diff --git a/.github/workflows/multi_device.yml b/.github/workflows/multi_device.yml index ebdb982148..3be4d55e88 100644 --- a/.github/workflows/multi_device.yml +++ b/.github/workflows/multi_device.yml @@ -63,4 +63,4 @@ jobs: - name: Test adapters working-directory: ${{github.workspace}}/build - run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|program|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180 + run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180 diff --git a/include/ur_api.h b/include/ur_api.h index 8731d78c00..ca58a7ac66 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -4202,17 +4202,19 @@ urProgramCreateWithIL( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -4225,21 +4227,27 @@ urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t **ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t *pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created. ); @@ -10325,9 +10333,10 @@ typedef struct ur_program_create_with_il_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_program_create_with_binary_params_t { ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - size_t *psize; - const uint8_t **ppBinary; + uint32_t *pnumDevices; + ur_device_handle_t **pphDevices; + size_t **ppLengths; + const uint8_t ***pppBinaries; const ur_program_properties_t **ppProperties; ur_program_handle_t **pphProgram; } ur_program_create_with_binary_params_t; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index b4d6f2bade..80a0003fca 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -284,9 +284,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)( /// @brief Function-pointer for urProgramCreateWithBinary typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithBinary_t)( ur_context_handle_t, - ur_device_handle_t, - size_t, - const uint8_t *, + uint32_t, + ur_device_handle_t *, + size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *); diff --git a/include/ur_print.hpp b/include/ur_print.hpp index a443e04f2f..22c9683840 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -11179,21 +11179,44 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".numDevices = "; - ur::details::printPtr(os, - *(params->phDevice)); + os << *(params->pnumDevices); os << ", "; - os << ".size = "; + os << ".phDevices = {"; + for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - os << *(params->psize); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; os << ", "; - os << ".pBinary = "; + os << ".pLengths = {"; + for (size_t i = 0; *(params->ppLengths) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - *(params->ppBinary)); + os << (*(params->ppLengths))[i]; + } + os << "}"; + + os << ", "; + os << ".ppBinaries = {"; + for (size_t i = 0; *(params->pppBinaries) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pppBinaries))[i]); + } + os << "}"; os << ", "; os << ".pProperties = "; diff --git a/scripts/core/program.yml b/scripts/core/program.yml index 23f07d4287..769a312f1d 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -119,7 +119,7 @@ returns: - "`length == 0`" --- #-------------------------------------------------------------------------- type: function -desc: "Create a program object from device native binary." +desc: "Create a program object from native binaries for the specified devices." class: $xProgram name: CreateWithBinary decl: static @@ -128,22 +128,25 @@ analogue: - "**clCreateProgramWithBinary**" details: - "The application may call this function from simultaneous threads." - - "Following a successful call to this entry point, `phProgram` will contain a binary of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`." - - "The device specified by `hDevice` must be device associated with context." - - "The adapter may (but is not required to) perform validation of the provided module during this call." + - "Following a successful call to this entry point, `phProgram` will contain binaries of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in `phDevices`." + - "The devices specified by `phDevices` must be associated with the context." + - "The adapter may (but is not required to) perform validation of the provided modules during this call." params: - type: $x_context_handle_t name: hContext desc: "[in] handle of the context instance" - - type: $x_device_handle_t - name: hDevice - desc: "[in] handle to device associated with binary." - - type: size_t - name: size - desc: "[in] size in bytes." - - type: const uint8_t* - name: pBinary - desc: "[in] pointer to binary." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] a pointer to a list of device handles. The binaries are loaded for devices specified in this list." + - type: size_t* + name: pLengths + desc: "[in][range(0, numDevices)] array of sizes of program binaries specified by `pBinaries` (in bytes)." + - type: const uint8_t** + name: ppBinaries + desc: "[in][range(0, numDevices)] pointer to program binaries to be loaded for devices specified by `phDevices`." - type: const $x_program_properties_t* name: pProperties desc: "[in][optional] pointer to program creation properties." @@ -155,8 +158,9 @@ returns: - "`NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas`" - $X_RESULT_ERROR_INVALID_SIZE: - "`NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0`" + - "`numDevices == 0`" - $X_RESULT_ERROR_INVALID_NATIVE_BINARY: - - "If `pBinary` isn't a valid binary for `hDevice.`" + - "If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.`" --- #-------------------------------------------------------------------------- type: function desc: "Produces an executable program from one program, negates need for the linking step." diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index a475d43ce2..4b963a737a 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -493,12 +493,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - UR_CHECK_ERROR( - createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram)); + UR_CHECK_ERROR(createProgram(hContext, phDevices[0], pLengths[0], + ppBinaries[0], pProperties, phProgram)); (*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; return UR_RESULT_SUCCESS; diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index b1d7d28c47..4c4f2b2766 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -480,9 +480,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( /// /// Note: Only supports one device UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + auto size = pLengths[0]; UR_ASSERT(std::find(hContext->getDevices().begin(), hContext->getDevices().end(), hDevice) != hContext->getDevices().end(), diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index c77bb22b8c..edba11c4a2 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -495,18 +495,11 @@ ur_result_t urEnqueueDeviceGlobalVariableWrite( ///< this particular kernel execution instance. ) { std::scoped_lock lock(Queue->Mutex); - - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } - // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; + ze_module_handle_t ZeModule = + Program->getZeModuleHandle(Queue->Device->ZeDevice); ZE2UR_CALL(zeModuleGetGlobalPointer, (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { @@ -557,15 +550,8 @@ ur_result_t urEnqueueDeviceGlobalVariableRead( ///< this particular kernel execution instance. ) { std::scoped_lock lock(Queue->Mutex); - - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } - + ze_module_handle_t ZeModule = + Program->getZeModuleHandle(Queue->Device->ZeDevice); // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; @@ -603,10 +589,6 @@ ur_result_t urKernelCreate( *RetKernel ///< [out] pointer to handle of kernel object created. ) { std::shared_lock Guard(Program->Mutex); - if (Program->State != ur_program_handle_t_::state::Exe) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } - try { ur_kernel_handle_t_ *UrKernel = new ur_kernel_handle_t_(true, Program); *RetKernel = reinterpret_cast(UrKernel); @@ -616,8 +598,14 @@ ur_result_t urKernelCreate( return UR_RESULT_ERROR_UNKNOWN; } - for (auto It : Program->ZeModuleMap) { - auto ZeModule = It.second; + for (auto &Dev : Program->AssociatedDevices) { + auto ZeDevice = Dev->ZeDevice; + // Program may be associated with all devices from the context but built + // only for subset of devices. + if (Program->getState(ZeDevice) != ur_program_handle_t_::state::Exe) + continue; + + auto ZeModule = Program->getZeModuleHandle(ZeDevice); ZeStruct ZeKernelDesc; ZeKernelDesc.flags = 0; ZeKernelDesc.pKernelName = KernelName; @@ -632,8 +620,6 @@ ur_result_t urKernelCreate( return ze2urResult(ZeResult); } - auto ZeDevice = It.first; - // Store the kernel in the ZeKernelMap so the correct // kernel can be retrieved later for a specific device // where a queue is being submitted. @@ -651,6 +637,9 @@ ur_result_t urKernelCreate( (*RetKernel)->ZeKernelMap[ZeSubDevice] = ZeKernel; } } + // There is no any successfully built executable for program. + if ((*RetKernel)->ZeKernelMap.empty()) + return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; (*RetKernel)->ZeKernel = (*RetKernel)->ZeKernelMap.begin()->second; diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 5f5ec387a0..d7adc5eb37 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -83,15 +83,21 @@ ur_result_t urProgramCreateWithIL( } ur_result_t urProgramCreateWithBinary( - ur_context_handle_t Context, ///< [in] handle of the context instance + ur_context_handle_t hContext, ///< [in] handle of the context instance + uint32_t numDevices, ///< [in] number of devices ur_device_handle_t - Device, ///< [in] handle to device associated with binary. - size_t Size, ///< [in] size in bytes. - const uint8_t *Binary, ///< [in] pointer to binary. - const ur_program_properties_t - *Properties, ///< [in][optional] pointer to program creation properties. + *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of + ///< device handles. The binaries are loaded for devices + ///< specified in this list. + size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program + ///< binaries specified by `pBinaries` (in bytes). + const uint8_t * + *ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries + ///< to be loaded for devices specified by `phDevices`. + const ur_program_properties_t * + pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t - *Program ///< [out] pointer to handle of Program object created. + *phProgram ///< [out] pointer to handle of Program object created. ) { // In OpenCL, clCreateProgramWithBinary() can be used to load any of the // following: "program executable", "compiled program", or "library of @@ -103,19 +109,22 @@ ur_result_t urProgramCreateWithBinary( // somehow examine the binary image to distinguish the cases. Alternatively, // we could change the PI interface and have the caller pass additional // information to distinguish the cases. - try { - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Native, Context, Device, - Properties, Binary, Size); - *Program = reinterpret_cast(UrProgram); + for (uint32_t i = 0; i < numDevices; i++) { + UR_ASSERT(ppBinaries[i] || !pLengths[0], UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT(hContext->isValidDevice(phDevices[i]), + UR_RESULT_ERROR_INVALID_DEVICE); + } + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( + ur_program_handle_t_::Native, hContext, numDevices, phDevices, + pProperties, ppBinaries, pLengths); + *phProgram = reinterpret_cast(UrProgram); + return UR_RESULT_SUCCESS; } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } - - return UR_RESULT_SUCCESS; } ur_result_t urProgramBuild( @@ -143,27 +152,9 @@ ur_result_t urProgramBuildExp( // UR_ASSERT(Program->Context->isValidDevice(Devices[0]), // UR_RESULT_ERROR_INVALID_VALUE); - // We should have either IL or native device code. - UR_ASSERT(hProgram->Code, UR_RESULT_ERROR_INVALID_PROGRAM); - - // It is legal to build a program created from either IL or from native - // device code. - if (hProgram->State != ur_program_handle_t_::IL && - hProgram->State != ur_program_handle_t_::Native) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - std::scoped_lock Guard(hProgram->Mutex); - // Ask Level Zero to build and load the native code onto the device. - ZeStruct ZeModuleDesc; ur_program_handle_t_::SpecConstantShim Shim(hProgram); - ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) - ? ZE_MODULE_FORMAT_IL_SPIRV - : ZE_MODULE_FORMAT_NATIVE; - - ZeModuleDesc.inputSize = hProgram->CodeLength; - ZeModuleDesc.pInputModule = hProgram->Code.get(); // if large allocations are selected, then pass // ze-opt-greater-than-4GB-buffer-required to disable @@ -178,24 +169,45 @@ ur_result_t urProgramBuildExp( ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; } - ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); - ZeModuleDesc.pConstants = Shim.ze(); ur_result_t Result = UR_RESULT_SUCCESS; - for (uint32_t i = 0; i < numDevices; i++) { + ZeStruct ZeModuleDesc; + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); + ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice; + auto State = hProgram->getState(ZeDevice); + + // We don't want to rebuild the module if it was already built. + if (State == ur_program_handle_t_::Exe) + continue; + + // It is legal to build a program created from either IL or from native + // device code. + if (State != ur_program_handle_t_::IL && + State != ur_program_handle_t_::Native) + return UR_RESULT_ERROR_INVALID_OPERATION; + + // We should have either IL or native device code. + auto Code = hProgram->getCode(ZeDevice); + UR_ASSERT(Code, UR_RESULT_ERROR_INVALID_PROGRAM); + + ZeModuleDesc.format = (State == ur_program_handle_t_::IL) + ? ZE_MODULE_FORMAT_IL_SPIRV + : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->getCodeSize(ZeDevice); + ZeModuleDesc.pInputModule = Code; ze_context_handle_t ZeContext = hProgram->Context->getZeHandle(); ze_module_handle_t ZeModuleHandle = nullptr; ze_module_build_log_handle_t ZeBuildLog{}; - hProgram->State = ur_program_handle_t_::Exe; ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, &ZeModuleHandle, &ZeBuildLog)); + hProgram->setState(ZeDevice, ur_program_handle_t_::Exe); if (ZeResult != ZE_RESULT_SUCCESS) { // We adjust ur_program below to avoid attempting to release zeModule when // RT calls urProgramRelease(). - hProgram->State = ur_program_handle_t_::Invalid; + hProgram->setState(ZeDevice, ur_program_handle_t_::Invalid); Result = ze2urResult(ZeResult); if (ZeModuleHandle) { ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModuleHandle)); @@ -209,7 +221,7 @@ ur_result_t urProgramBuildExp( // Therefore, do an extra check now for unresolved symbols. ZeResult = checkUnresolvedSymbols(ZeModuleHandle, &ZeBuildLog); if (ZeResult != ZE_RESULT_SUCCESS) { - hProgram->State = ur_program_handle_t_::Invalid; + hProgram->setState(ZeDevice, ur_program_handle_t_::Invalid); Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) ? UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE : ze2urResult(ZeResult); @@ -218,15 +230,11 @@ ur_result_t urProgramBuildExp( ZeModuleHandle = nullptr; } } - hProgram->ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModuleHandle)); + hProgram->setZeModule(ZeDevice, ZeModuleHandle); } - hProgram->ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); + hProgram->setBuildLog(ZeDevice, ZeBuildLog); } - if (!hProgram->ZeModuleMap.empty()) - hProgram->ZeModule = hProgram->ZeModuleMap.begin()->second; - if (!hProgram->ZeBuildLogMap.empty()) - hProgram->ZeBuildLog = hProgram->ZeBuildLogMap.begin()->second; return Result; } @@ -239,10 +247,38 @@ ur_result_t urProgramCompileExp( const char *pOptions ///< [in][optional] pointer to build options ///< null-terminated string. ) { - std::ignore = numDevices; - std::ignore = phDevices; - return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, - pOptions); + std::scoped_lock Guard(hProgram->Mutex); + // Check that state is IL for all devices in the context and set the state to + // Object. + for (uint32_t I = 0; I < numDevices; I++) { + auto ZeDevice = phDevices[I]->ZeDevice; + // It's only valid to compile a program created from IL (we don't support + // programs created from source code). + // + // The OpenCL spec says that the header parameters are ignored when + // compiling IL programs, so we don't validate them. + if (hProgram->getState(ZeDevice) != ur_program_handle_t_::IL) + return UR_RESULT_ERROR_INVALID_OPERATION; + hProgram->setState(ZeDevice, ur_program_handle_t_::Object); + // We don't compile anything now. Instead, we delay compilation until + // urProgramLink, where we do both compilation and linking as a single step. + // This produces better code because the driver can do cross-module + // optimizations. Therefore, we just remember the compilation flags, so we + // can use them later. + if (pOptions) { + hProgram->setBuildOptions(ZeDevice, pOptions); + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (phDevices[I]->useRelaxedAllocationLimits()) { + hProgram->appendBuildOptions( + ZeDevice, " -ze-opt-greater-than-4GB-buffer-required"); + } + } + hProgram->setState(ZeDevice, ur_program_handle_t_::Object); + } + return UR_RESULT_SUCCESS; } ur_result_t urProgramCompile( @@ -252,36 +288,9 @@ ur_result_t urProgramCompile( const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { - std::ignore = Context; - std::scoped_lock Guard(Program->Mutex); - - // It's only valid to compile a program created from IL (we don't support - // programs created from source code). - // - // The OpenCL spec says that the header parameters are ignored when compiling - // IL programs, so we don't validate them. - if (Program->State != ur_program_handle_t_::IL) - return UR_RESULT_ERROR_INVALID_OPERATION; - - // We don't compile anything now. Instead, we delay compilation until - // urProgramLink, where we do both compilation and linking as a single step. - // This produces better code because the driver can do cross-module - // optimizations. Therefore, we just remember the compilation flags, so we - // can use them later. - if (Options) { - Program->BuildFlags = Options; - - // if large allocations are selected, then pass - // ze-opt-greater-than-4GB-buffer-required to disable - // stateful optimizations and be able to use larger than - // 4GB allocations on these kernels. - if (Context->getDevices()[0]->useRelaxedAllocationLimits()) { - Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; - } - } - Program->State = ur_program_handle_t_::Object; - - return UR_RESULT_SUCCESS; + auto devices = Context->getDevices(); + return ur::level_zero::urProgramCompileExp(Program, devices.size(), + devices.data(), Options); } ur_result_t urProgramLink( @@ -337,7 +346,7 @@ ur_result_t urProgramLinkExp( ur_result_t UrResult = UR_RESULT_SUCCESS; try { // Acquire a "shared" lock on each of the input programs, and also validate - // that they are all in Object state. + // that they are all in Object state for each device in the input list. // // There is no danger of deadlock here even if two threads call // urProgramLink simultaneously with the same input programs in a different @@ -351,8 +360,12 @@ ur_result_t urProgramLinkExp( for (uint32_t I = 0; I < count; I++) { std::shared_lock Guard(phPrograms[I]->Mutex); Guards[I].swap(Guard); - if (phPrograms[I]->State != ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_OPERATION; + for (uint32_t DeviceIndex = 0; DeviceIndex < numDevices; DeviceIndex++) { + auto Device = phDevices[DeviceIndex]; + if (phPrograms[I]->getState(Device->ZeDevice) != + ur_program_handle_t_::Object) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } } } @@ -373,9 +386,8 @@ ur_result_t urProgramLinkExp( for (uint32_t I = 0; I < count; I++) { ur_program_handle_t Program = phPrograms[I]; - CodeSizes[I] = Program->CodeLength; - CodeBufs[I] = Program->Code.get(); - BuildFlagPtrs[I] = Program->BuildFlags.c_str(); + CodeSizes[I] = Program->getCodeSize(); + CodeBufs[I] = Program->getCode(); SpecConstShims.emplace_back(Program); SpecConstPtrs[I] = SpecConstShims[I].ze(); } @@ -383,7 +395,6 @@ ur_result_t urProgramLinkExp( ZeExtModuleDesc.count = count; ZeExtModuleDesc.inputSizes = CodeSizes.data(); ZeExtModuleDesc.pInputModules = CodeBufs.data(); - ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); ZeExtModuleDesc.pConstants = SpecConstPtrs.data(); ZeStruct ZeModuleDesc; @@ -419,7 +430,6 @@ ur_result_t urProgramLinkExp( ZeModuleDesc.pNext = nullptr; ZeModuleDesc.inputSize = ZeExtModuleDesc.inputSizes[0]; ZeModuleDesc.pInputModule = ZeExtModuleDesc.pInputModules[0]; - ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; ZeModuleDesc.pConstants = ZeExtModuleDesc.pConstants[0]; } else { logger::error( @@ -428,10 +438,9 @@ ur_result_t urProgramLinkExp( return UR_RESULT_ERROR_INVALID_VALUE; } } - std::unordered_map ZeModuleMap; - std::unordered_map - ZeBuildLogMap; + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_(hContext); + *phProgram = reinterpret_cast(UrProgram); for (uint32_t i = 0; i < numDevices; i++) { // Call the Level Zero API to compile, link, and create the module. @@ -439,6 +448,18 @@ ur_result_t urProgramLinkExp( ze_context_handle_t ZeContext = hContext->getZeHandle(); ze_module_handle_t ZeModule = nullptr; ze_module_build_log_handle_t ZeBuildLog = nullptr; + + // Build flags may be different for different devices, so handle them + // here. Clear values of the previous device first. + BuildFlagPtrs.clear(); + for (uint32_t I = 0; I < count; I++) { + BuildFlagPtrs.push_back( + phPrograms[I]->getBuildOptions(ZeDevice).c_str()); + } + ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); + if (count == 1) + ZeModuleDesc.pBuildFlags = ZeExtModuleDesc.pBuildFlags[0]; + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, &ZeModule, &ZeBuildLog)); @@ -465,19 +486,12 @@ ur_result_t urProgramLinkExp( return ze2urResult(ZeResult); } } - ZeModuleMap.insert(std::make_pair(ZeDevice, ZeModule)); - ZeBuildLogMap.insert(std::make_pair(ZeDevice, ZeBuildLog)); + UrProgram->setZeModule(ZeDevice, ZeModule); + UrProgram->setBuildLog(ZeDevice, ZeBuildLog); + UrProgram->setState(ZeDevice, (UrResult == UR_RESULT_SUCCESS) + ? ur_program_handle_t_::Exe + : ur_program_handle_t_::Invalid); } - - ur_program_handle_t_::state State = (UrResult == UR_RESULT_SUCCESS) - ? ur_program_handle_t_::Exe - : ur_program_handle_t_::Invalid; - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(State, hContext, ZeModuleMap.begin()->second, - ZeBuildLogMap.begin()->second); - *phProgram = reinterpret_cast(UrProgram); - (*phProgram)->ZeModuleMap = std::move(ZeModuleMap); - (*phProgram)->ZeBuildLogMap = std::move(ZeBuildLogMap); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -544,17 +558,11 @@ ur_result_t urProgramGetFunctionPointer( ///< it is found in the program. ) { std::shared_lock Guard(Program->Mutex); - if (Program->State != ur_program_handle_t_::Exe) { + if (Program->getState(Device->ZeDevice) != ur_program_handle_t_::Exe) { return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; } - ze_module_handle_t ZeModule{}; - auto It = Program->ZeModuleMap.find(Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModule = It->second; - } else { - ZeModule = Program->ZeModule; - } + ze_module_handle_t ZeModule = Program->getZeModuleHandle(Device->ZeDevice); ze_result_t ZeResult = ZE_CALL_NOCHECK( zeModuleGetFunctionPointer, (ZeModule, FunctionName, FunctionPointerRet)); @@ -612,13 +620,7 @@ ur_result_t urProgramGetGlobalVariablePointer( std::scoped_lock lock(Program->Mutex); ze_module_handle_t ZeModuleEntry{}; - ZeModuleEntry = Program->ZeModule; - if (!Program->ZeModuleMap.empty()) { - auto It = Program->ZeModuleMap.find(Device->ZeDevice); - if (It != Program->ZeModuleMap.end()) { - ZeModuleEntry = It->second; - } - } + ZeModuleEntry = Program->getZeModuleHandle(Device->ZeDevice); ze_result_t ZeResult = zeModuleGetGlobalPointer(ZeModuleEntry, GlobalVariableName, @@ -652,55 +654,39 @@ ur_result_t urProgramGetInfo( case UR_PROGRAM_INFO_CONTEXT: return ReturnValue(Program->Context); case UR_PROGRAM_INFO_NUM_DEVICES: - if (!Program->ZeModuleMap.empty()) - return ReturnValue( - uint32_t{ur_cast(Program->ZeModuleMap.size())}); - else - return ReturnValue(uint32_t{1}); + return ReturnValue( + uint32_t{ur_cast(Program->AssociatedDevices.size())}); case UR_PROGRAM_INFO_DEVICES: - if (!Program->ZeModuleMap.empty()) { - std::vector devices; - for (auto &ZeModulePair : Program->ZeModuleMap) { - auto It = Program->ZeModuleMap.find(ZeModulePair.first); - if (It != Program->ZeModuleMap.end()) { - for (auto &Device : Program->Context->getDevices()) { - if (Device->ZeDevice == ZeModulePair.first) { - devices.push_back(Device); - } - } - } - } - return ReturnValue(devices.data(), devices.size()); - } else { - return ReturnValue(Program->Context->getDevices()[0]); - } + return ReturnValue(Program->AssociatedDevices.data(), + Program->AssociatedDevices.size()); case UR_PROGRAM_INFO_BINARY_SIZES: { std::shared_lock Guard(Program->Mutex); - size_t SzBinary; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - SzBinary = Program->CodeLength; - } else if (Program->State == ur_program_handle_t_::Exe) { - if (!Program->ZeModuleMap.empty()) { - std::vector binarySizes; - for (auto &ZeModulePair : Program->ZeModuleMap) { - size_t binarySize = 0; - ZE2UR_CALL(zeModuleGetNativeBinary, - (ZeModulePair.second, &binarySize, nullptr)); - binarySizes.push_back(binarySize); - } - return ReturnValue(binarySizes.data(), binarySizes.size()); + std::vector binarySizes; + for (auto Device : Program->AssociatedDevices) { + auto State = Program->getState(Device->ZeDevice); + if (State == ur_program_handle_t_::Native) { + binarySizes.push_back(Program->getCodeSize(Device->ZeDevice)); + continue; + } + auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + + if (State == ur_program_handle_t_::IL || + State == ur_program_handle_t_::Object) { + // We don't have a binary for this device, so return size of the spirv + // code. This is an array of 1 element, initialized as if it were + // scalar. + return ReturnValue(size_t{Program->getCodeSize()}); + } else if (State == ur_program_handle_t_::Exe) { + size_t binarySize = 0; + ZE2UR_CALL(zeModuleGetNativeBinary, (ZeModule, &binarySize, nullptr)); + binarySizes.push_back(binarySize); } else { - ZE2UR_CALL(zeModuleGetNativeBinary, - (Program->ZeModule, &SzBinary, nullptr)); - return ReturnValue(SzBinary); + return UR_RESULT_ERROR_INVALID_PROGRAM; } - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } - // This is an array of 1 element, initialized as if it were scalar. - return ReturnValue(size_t{SzBinary}); + return ReturnValue(binarySizes.data(), binarySizes.size()); } case UR_PROGRAM_INFO_BINARIES: { // The caller sets "ParamValue" to an array of pointers, one for each @@ -713,100 +699,108 @@ ur_result_t urProgramGetInfo( } } std::shared_lock Guard(Program->Mutex); - // If the caller is using a Program which is IL, Native or an object, then - // the program has not been built for multiple devices so a single IL is - // returned. - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - if (PropSizeRet) - *PropSizeRet = Program->CodeLength; - if (PBinary) { - std::memcpy(PBinary[0], Program->Code.get(), Program->CodeLength); + uint8_t *NativeBinaryPtr = nullptr; + if (PBinary) { + NativeBinaryPtr = PBinary[0]; + } + + size_t SzBinary = 0; + for (uint32_t deviceIndex = 0; + deviceIndex < Program->AssociatedDevices.size(); deviceIndex++) { + auto ZeDevice = Program->AssociatedDevices[deviceIndex]->ZeDevice; + auto State = Program->getState(ZeDevice); + if (State == ur_program_handle_t_::Native) { + // If Program was created from Native code then return that code. + if (PBinary) { + std::memcpy(PBinary[deviceIndex], Program->getCode(ZeDevice), + Program->getCodeSize(ZeDevice)); + } + SzBinary += Program->getCodeSize(ZeDevice); + continue; } - } else if (Program->State == ur_program_handle_t_::Exe) { - // If the caller is using a Program which is a built binary, then - // the program returned will either be a single module if this is a native - // binary or the native binary for each device will be returned. - size_t SzBinary = 0; - uint8_t *NativeBinaryPtr = nullptr; - if (PBinary) { - NativeBinaryPtr = PBinary[0]; + auto ZeModule = Program->getZeModuleHandle(ZeDevice); + if (!ZeModule) { + return UR_RESULT_ERROR_INVALID_PROGRAM; } - if (!Program->ZeModuleMap.empty()) { - uint32_t deviceIndex = 0; - for (auto &ZeDeviceModule : Program->ZeModuleMap) { - size_t binarySize = 0; - if (PBinary) { - NativeBinaryPtr = PBinary[deviceIndex++]; - } - ZE2UR_CALL(zeModuleGetNativeBinary, - (ZeDeviceModule.second, &binarySize, NativeBinaryPtr)); - SzBinary += binarySize; + // If the caller is using a Program which is IL or an object, then + // the program has not been built for multiple devices so a single IL is + // returned. + // TODO: currently if program is not compiled for any of the associated + // devices, we just return spirv code, assuming that we either have the + // program built for all associated devices or for none. It is possible + // that program is compiled for subset of associated devices, so that case + // probably should be explicitely specified and handled better. + if (State == ur_program_handle_t_::IL || + State == ur_program_handle_t_::Object) { + if (PropSizeRet) + *PropSizeRet = Program->getCodeSize(); + if (PBinary) { + std::memcpy(PBinary[0], Program->getCode(), Program->getCodeSize()); } - } else { + break; + } else if (State == ur_program_handle_t_::Exe) { + size_t binarySize = 0; + if (PBinary) { + NativeBinaryPtr = PBinary[deviceIndex]; + } + // If the caller is using a Program which is a built binary, then + // the program returned will either be a single module if this is a + // native binary or the native binary for each device will be returned. ZE2UR_CALL(zeModuleGetNativeBinary, - (Program->ZeModule, &SzBinary, NativeBinaryPtr)); + (ZeModule, &binarySize, NativeBinaryPtr)); + SzBinary += binarySize; + } else { + return UR_RESULT_ERROR_INVALID_PROGRAM; } - if (PropSizeRet) - *PropSizeRet = SzBinary; - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } + if (PropSizeRet) + *PropSizeRet = SzBinary; break; } case UR_PROGRAM_INFO_NUM_KERNELS: { std::shared_lock Guard(Program->Mutex); uint32_t NumKernels = 0; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } else if (Program->State == ur_program_handle_t_::Exe) { - if (!Program->ZeModuleMap.empty()) { - ZE2UR_CALL( - zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &NumKernels, nullptr)); - } else { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &NumKernels, nullptr)); + ze_module_handle_t ZeModule = nullptr; + // Find the first module in exe state. + for (const auto &Device : Program->AssociatedDevices) { + if (Program->getState(Device->ZeDevice) == ur_program_handle_t_::Exe) { + ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + break; } - } else { - return UR_RESULT_ERROR_INVALID_PROGRAM; } + + // If none of the modules are in exe state, return error. + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &NumKernels, nullptr)); return ReturnValue(size_t{NumKernels}); } case UR_PROGRAM_INFO_KERNEL_NAMES: try { std::shared_lock Guard(Program->Mutex); - std::string PINames{""}; - if (Program->State == ur_program_handle_t_::IL || - Program->State == ur_program_handle_t_::Native || - Program->State == ur_program_handle_t_::Object) { - return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; - } else if (Program->State == ur_program_handle_t_::Exe) { - uint32_t Count = 0; - std::unique_ptr PNames; - if (!Program->ZeModuleMap.empty()) { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &Count, nullptr)); - PNames = std::make_unique(Count); - ZE2UR_CALL( - zeModuleGetKernelNames, - (Program->ZeModuleMap.begin()->second, &Count, PNames.get())); - } else { - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &Count, nullptr)); - PNames = std::make_unique(Count); - ZE2UR_CALL(zeModuleGetKernelNames, - (Program->ZeModule, &Count, PNames.get())); - } - for (uint32_t I = 0; I < Count; ++I) { - PINames += (I > 0 ? ";" : ""); - PINames += PNames[I]; + ze_module_handle_t ZeModule = nullptr; + // Find the first module in exe state. + for (const auto &Device : Program->AssociatedDevices) { + if (Program->getState(Device->ZeDevice) == ur_program_handle_t_::Exe) { + ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + break; } - } else { + } + + // If none of the modules are in exe state, return error. + if (!ZeModule) return UR_RESULT_ERROR_INVALID_PROGRAM; + + std::string PINames{""}; + uint32_t Count = 0; + std::unique_ptr PNames; + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &Count, nullptr)); + PNames = std::make_unique(Count); + ZE2UR_CALL(zeModuleGetKernelNames, (ZeModule, &Count, PNames.get())); + for (uint32_t I = 0; I < Count; ++I) { + PINames += (I > 0 ? ";" : ""); + PINames += PNames[I]; } return ReturnValue(PINames.c_str()); } catch (const std::bad_alloc &) { @@ -815,7 +809,7 @@ ur_result_t urProgramGetInfo( return UR_RESULT_ERROR_UNKNOWN; } case UR_PROGRAM_INFO_IL: - return ReturnValue(Program->Code.get(), Program->CodeLength); + return ReturnValue(Program->getCode(), Program->getCodeSize()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -843,11 +837,13 @@ ur_result_t urProgramGetBuildInfo( UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); if (PropName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { ur_program_binary_type_t Type = UR_PROGRAM_BINARY_TYPE_NONE; - if (Program->State == ur_program_handle_t_::Object) { + auto State = Program->getState(Device->ZeDevice); + if (State == ur_program_handle_t_::Object) { Type = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; - } else if (Program->State == ur_program_handle_t_::Exe) { + } else if (State == ur_program_handle_t_::Exe) { Type = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; } + return ReturnValue(ur_program_binary_type_t{Type}); } if (PropName == UR_PROGRAM_BUILD_INFO_OPTIONS) { @@ -866,10 +862,8 @@ ur_result_t urProgramGetBuildInfo( } // Next check if there is a Level Zero build log. - if (Program->ZeBuildLogMap.find(Device->ZeDevice) != - Program->ZeBuildLogMap.end()) { - ze_module_build_log_handle_t ZeBuildLog = - Program->ZeBuildLogMap.begin()->second; + auto ZeBuildLog = Program->getBuildLog(Device->ZeDevice); + if (ZeBuildLog) { size_t LogSize = PropSize; ZE2UR_CALL(zeModuleBuildLogGetString, (ZeBuildLog, &LogSize, ur_cast(PropValue))); @@ -883,10 +877,10 @@ ur_result_t urProgramGetBuildInfo( // the failed build log here because RT does not create sycl::program // when urProgramBuild() fails, thus it won't call urProgramRelease() // to clean up the build log. - if (Program->State == ur_program_handle_t_::Invalid) { + if (Program->getState(Device->ZeDevice) == + ur_program_handle_t_::Invalid) { ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLog)); - Program->ZeBuildLogMap.erase(Device->ZeDevice); - ZeBuildLog = nullptr; + Program->setBuildLog(Device->ZeDevice, nullptr); } } return UR_RESULT_SUCCESS; @@ -926,16 +920,20 @@ ur_result_t urProgramGetNativeHandle( auto ZeModule = ur_cast(NativeProgram); std::shared_lock Guard(Program->Mutex); - switch (Program->State) { - case ur_program_handle_t_::Exe: { - *ZeModule = Program->ZeModule; - break; + assert(Program->AssociatedDevices.size() > 0); + // Current API doesn't allow to specify device for which we want to get the + // native handle. So, find the first device with a valid module handle. + ze_module_handle_t Module = nullptr; + for (const auto &Device : Program->AssociatedDevices) { + Module = Program->getZeModuleHandle(Device->ZeDevice); + if (Module) { + break; + } } - - default: + if (!Module) return UR_RESULT_ERROR_INVALID_OPERATION; - } + *ZeModule = Module; return UR_RESULT_SUCCESS; } @@ -949,7 +947,6 @@ ur_result_t urProgramCreateWithNativeHandle( ur_program_handle_t *Program ///< [out] pointer to the handle of the ///< program object created. ) { - std::ignore = Properties; UR_ASSERT(Context && NativeProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(Program, UR_RESULT_ERROR_INVALID_NULL_POINTER); auto ZeModule = ur_cast(NativeProgram); @@ -959,9 +956,9 @@ ur_result_t urProgramCreateWithNativeHandle( // executable (state Object). try { - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Exe, Context, ZeModule, - Properties->isNativeHandleOwned); + ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( + ur_program_handle_t_::Exe, Context, ZeModule, + Properties ? Properties->isNativeHandleOwned : false); *Program = reinterpret_cast(UrProgram); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -995,6 +992,67 @@ ur_result_t urProgramSetSpecializationConstants( } // namespace ur::level_zero +ur_program_handle_t_::ur_program_handle_t_(state St, + ur_context_handle_t Context, + const void *Input, size_t Length) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices(Context->getDevices()), SpirvCode{new uint8_t[Length]}, + SpirvCodeLength{Length} { + std::memcpy(SpirvCode.get(), Input, Length); + // All devices have the program in IL state. + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } +} + +ur_program_handle_t_::ur_program_handle_t_( + state St, ur_context_handle_t Context, const uint32_t NumDevices, + const ur_device_handle_t *Devices, + const ur_program_properties_t *Properties, const uint8_t **Inputs, + const size_t *Lengths) + : Context{Context}, NativeProperties(Properties), OwnZeModule{true}, + AssociatedDevices(Devices, Devices + NumDevices) { + for (uint32_t I = 0; I < NumDevices; ++I) { + DeviceData &PerDevData = DeviceDataMap[Devices[I]->ZeDevice]; + PerDevData.State = St; + PerDevData.Binary = std::make_pair( + std::unique_ptr(new uint8_t[Lengths[I]]), Lengths[I]); + std::memcpy(PerDevData.Binary.first.get(), Inputs[I], Lengths[I]); + } +} + +ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices(Context->getDevices()) {} + +ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} {} + +ur_program_handle_t_::ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule, + bool OwnZeModule) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{OwnZeModule}, + AssociatedDevices({Context->getDevices()[0]}), InteropZeModule{ + InteropZeModule} { + // TODO: Currently it is not possible to understand the device associated + // with provided ZeModule. So we can't set the state on that device to Exe. +} + +ur_program_handle_t_::ur_program_handle_t_(state St, + ur_context_handle_t Context, + const std::string &ErrorMessage) + : Context{Context}, NativeProperties{nullptr}, OwnZeModule{true}, + ErrorMessage{ErrorMessage}, AssociatedDevices(Context->getDevices()) { + for (auto &Device : Context->getDevices()) { + DeviceData &PerDevData = DeviceDataMap[Device->ZeDevice]; + PerDevData.State = St; + } +} + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); @@ -1011,21 +1069,21 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { } } if (!resourcesReleased) { - for (auto &ZeBuildLogPair : this->ZeBuildLogMap) { - ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (ZeBuildLogPair.second)); + for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) { + if (DeviceData.ZeBuildLog) + ZE_CALL_NOCHECK(zeModuleBuildLogDestroy, (DeviceData.ZeBuildLog)); } - if (ZeModule && OwnZeModule) { - if (ZeModuleMap.empty()) { - // interop api - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); - } else { - for (auto &ZeModulePair : this->ZeModuleMap) { - ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModulePair.second)); - } - this->ZeModuleMap.clear(); - } - } + // interop api + if (InteropZeModule && OwnZeModule) + ZE_CALL_NOCHECK(zeModuleDestroy, (InteropZeModule)); + + for (auto &[ZeDevice, DeviceData] : this->DeviceDataMap) + if (DeviceData.ZeModule) + ZE_CALL_NOCHECK(zeModuleDestroy, (DeviceData.ZeModule)); + + this->DeviceDataMap.clear(); + resourcesReleased = true; } } diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 42330adcbf..4fe8c24acd 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -10,6 +10,7 @@ #pragma once #include "common.hpp" +#include "device.hpp" struct ur_program_handle_t_ : _ur_object { // ur_program_handle_t_() {} @@ -67,64 +68,117 @@ struct ur_program_handle_t_ : _ur_object { // Construct a program in IL. ur_program_handle_t_(state St, ur_context_handle_t Context, const void *Input, - size_t Length) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, Code{new uint8_t[Length]}, - CodeLength{Length}, ZeModule{nullptr}, ZeBuildLog{nullptr} { - std::memcpy(Code.get(), Input, Length); - } + size_t Length); - // Construct a program in NATIVE. + // Construct a program in NATIVE for multiple devices. ur_program_handle_t_(state St, ur_context_handle_t Context, - ur_device_handle_t Device, + const uint32_t NumDevices, + const ur_device_handle_t *Devices, const ur_program_properties_t *Properties, - const void *Input, size_t Length) - : Context{Context}, NativeDevice(Device), NativeProperties(Properties), - OwnZeModule{true}, State{St}, Code{new uint8_t[Length]}, - CodeLength{Length}, ZeModule{nullptr}, ZeBuildLog{nullptr} { - std::memcpy(Code.get(), Input, Length); - } + const uint8_t **Inputs, const size_t *Lengths); + + ur_program_handle_t_(ur_context_handle_t Context); // Construct a program in Exe or Invalid state. - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule, - ze_module_build_log_handle_t ZeBuildLog) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, ZeModule{ZeModule}, ZeBuildLog{ - ZeBuildLog} {} + ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule); // Construct a program in Exe state (interop). - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule, bool OwnZeModule) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{OwnZeModule}, State{St}, ZeModule{ZeModule}, ZeBuildLog{ - nullptr} {} - - // Construct a program from native handle - ur_program_handle_t_(state St, ur_context_handle_t Context, - ze_module_handle_t ZeModule) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, State{St}, ZeModule{ZeModule}, ZeBuildLog{nullptr} {} + // TODO: Currently it is not possible to get the device associated with the + // interop module, API must be changed to either get that info from the user + // or new API need to be added to L0 to fetch that info. Consider it + // associated with the first device in the context. + ur_program_handle_t_(state, ur_context_handle_t Context, + ze_module_handle_t InteropZeModule, bool OwnZeModule); // Construct a program in Invalid state with a custom error message. ur_program_handle_t_(state St, ur_context_handle_t Context, - const std::string &ErrorMessage) - : Context{Context}, NativeDevice{nullptr}, NativeProperties{nullptr}, - OwnZeModule{true}, ErrorMessage{ErrorMessage}, State{St}, - ZeModule{nullptr}, ZeBuildLog{nullptr} {} + const std::string &ErrorMessage); ~ur_program_handle_t_(); void ur_release_program_resources(bool deletion); + state getState(ze_device_handle_t ZeDevice) { + if ((DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) && + InteropZeModule) + return state::Exe; + + return DeviceDataMap[ZeDevice].State; + } + + ze_module_handle_t getZeModuleHandle(ze_device_handle_t ZeDevice) { + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return InteropZeModule; + + return DeviceDataMap[ZeDevice].ZeModule; + } + + uint8_t *getCode(ze_device_handle_t ZeDevice = nullptr) { + if (!ZeDevice) + return SpirvCode.get(); + + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return nullptr; + + if (DeviceDataMap[ZeDevice].State == state::IL) + return SpirvCode.get(); + else + return DeviceDataMap[ZeDevice].Binary.first.get(); + } + + size_t getCodeSize(ze_device_handle_t ZeDevice = nullptr) { + if (ZeDevice == nullptr) + return SpirvCodeLength; + + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return 0; + + if (DeviceDataMap[ZeDevice].State == state::IL) + return SpirvCodeLength; + else + return DeviceDataMap[ZeDevice].Binary.second; + } + + ze_module_build_log_handle_t getBuildLog(ze_device_handle_t ZeDevice) { + if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) + return nullptr; + + return DeviceDataMap[ZeDevice].ZeBuildLog; + } + + void setState(ze_device_handle_t ZeDevice, state NewState) { + DeviceDataMap[ZeDevice].State = NewState; + } + + void setZeModule(ze_device_handle_t ZeDevice, ze_module_handle_t ZeModule) { + DeviceDataMap[ZeDevice].ZeModule = ZeModule; + } + + void setBuildLog(ze_device_handle_t ZeDevice, + ze_module_build_log_handle_t ZeBuildLog) { + DeviceDataMap[ZeDevice].ZeBuildLog = ZeBuildLog; + } + + void setBuildOptions(ze_device_handle_t ZeDevice, + const std::string &Options) { + DeviceDataMap[ZeDevice].BuildFlags = Options; + } + + void appendBuildOptions(ze_device_handle_t ZeDevice, + const std::string &Options) { + DeviceDataMap[ZeDevice].BuildFlags += Options; + } + + std::string getBuildOptions(ze_device_handle_t ZeDevice) { + return DeviceDataMap[ZeDevice].BuildFlags; + } + // Tracks the release state of the program handle to determine if the // internal handle needs to be released. bool resourcesReleased = false; const ur_context_handle_t Context; // Context of the program. - // Device Handle used for the Native Build - ur_device_handle_t NativeDevice; - // Properties used for the Native Build const ur_program_properties_t *NativeProperties; @@ -136,35 +190,57 @@ struct ur_program_handle_t_ : _ur_object { // message from a call to urProgramLink. const std::string ErrorMessage; - state State; - - // In IL and Object states, this contains the SPIR-V representation of the - // module. In Native state, it contains the native code. - std::unique_ptr Code; // Array containing raw IL / native code. - size_t CodeLength{0}; // Size (bytes) of the array. - // Used only in IL and Object states. Contains the SPIR-V specialization // constants as a map from the SPIR-V "SpecID" to a buffer that contains the // associated value. The caller of the PI layer is responsible for // maintaining the storage of this buffer. std::unordered_map SpecConstants; - // Used only in Object state. Contains the build flags from the last call to - // urProgramCompile(). - std::string BuildFlags; - - // The Level Zero module handle. Used primarily in Exe state. - ze_module_handle_t ZeModule{}; - - // Map of L0 Modules created for all the devices for which a UR Program - // has been built. - std::unordered_map ZeModuleMap; + // Keep the vector of devices associated with the program. + // It is populated at program creation and used to provide information for the + // descriptors like UR_PROGRAM_INFO_DEVICES, UR_PROGRAM_INFO_BINARY_SIZES, + // UR_PROGRAM_INFO_BINARIES as they are supposed to return information in the + // same order. I.e. the first binary in the array returned by + // UR_PROGRAM_INFO_BINARIES is supposed to be associated with the first device + // in the returned array of devices for UR_PROGRAM_INFO_DEVICES. Same for + // UR_PROGRAM_INFO_BINARY_SIZES. + const std::vector AssociatedDevices; + +private: + struct DeviceData { + // Log from the result of building the program for the device using + // zeModuleCreate(). + ze_module_build_log_handle_t ZeBuildLog = nullptr; + + // The Level Zero module handle for the device. Used primarily in Exe state. + ze_module_handle_t ZeModule = nullptr; + + // In Native state, contains the pair of the binary code for the device and + // its length in bytes. + std::pair, size_t> Binary{nullptr, 0}; + + // Build flags used for building the program for the device. + // May be different for different devices, for example, if + // urProgramCompileExp was called multiple times with different build flags + // for different devices. + std::string BuildFlags{}; + + // State of the program for the device. + state State{}; + }; - // The Level Zero build log from the last call to zeModuleCreate(). - ze_module_build_log_handle_t ZeBuildLog{}; + std::unordered_map DeviceDataMap; - // Map of L0 Module Build logs created for all the devices for which a UR - // Program has been built. - std::unordered_map - ZeBuildLogMap; + // In IL and Object states, this contains the SPIR-V representation of the + // module. + std::unique_ptr SpirvCode; // Array containing raw IL code. + size_t SpirvCodeLength = 0; // Size (bytes) of the array. + + // The Level Zero module handle for interoperability. + // This module handle is either initialized with the handle provided to + // interoperability UR API, or with one of the handles after building the + // program. This handle is returned by UR API which allows to get the native + // handle from the program. + // TODO: Currently interoparability UR API does not support multiple devices. + ze_module_handle_t InteropZeModule = nullptr; }; diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 540eab7292..1207f7776b 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -187,9 +187,9 @@ ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram); ur_result_t urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram); + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram); ur_result_t urProgramBuild(ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions); ur_result_t urProgramCompile(ur_context_handle_t hContext, diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index e98221b9e5..26f3123c29 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -40,7 +40,14 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) { ur::level_zero::urProgramRetain(hProgram); - for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) { + for (auto &Dev : hProgram->AssociatedDevices) { + auto zeDevice = Dev->ZeDevice; + // Program may be associated with all devices from the context but built + // only for subset of devices. + if (hProgram->getState(zeDevice) != ur_program_handle_t_::state::Exe) + continue; + + auto zeModule = hProgram->getZeModuleHandle(zeDevice); ZeStruct zeKernelDesc; zeKernelDesc.pKernelName = kernelName; diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index b68af85033..dfb820d39a 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -847,15 +847,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableWrite( ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // TODO: implement program->getZeModuleMap() to be sure that - // it's thread-safe - ze_module_handle_t zeModule{}; - auto It = hProgram->ZeModuleMap.find(this->hDevice->ZeDevice); - if (It != hProgram->ZeModuleMap.end()) { - zeModule = It->second; - } else { - zeModule = hProgram->ZeModule; - } + // TODO: make getZeModuleHandle thread-safe + ze_module_handle_t zeModule = + hProgram->getZeModuleHandle(this->hDevice->ZeDevice); // Find global variable pointer auto globalVarPtr = getGlobalPointerFromModule(zeModule, offset, count, name); @@ -869,15 +863,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueDeviceGlobalVariableRead( ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // TODO: implement program->getZeModule() to be sure that - // it's thread-safe - ze_module_handle_t zeModule{}; - auto It = hProgram->ZeModuleMap.find(this->hDevice->ZeDevice); - if (It != hProgram->ZeModuleMap.end()) { - zeModule = It->second; - } else { - zeModule = hProgram->ZeModule; - } + // TODO: make getZeModuleHandle thread-safe + ze_module_handle_t zeModule = + hProgram->getZeModuleHandle(this->hDevice->ZeDevice); // Find global variable pointer auto globalVarPtr = getGlobalPointerFromModule(zeModule, offset, count, name); diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 1e2b788683..dea28a4658 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -3157,10 +3157,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -3169,7 +3175,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_result_t result = UR_RESULT_SUCCESS; ur_program_create_with_binary_params_t params = { - &hContext, &hDevice, &size, &pBinary, &pProperties, &phProgram}; + &hContext, &numDevices, &phDevices, &pLengths, + &ppBinaries, &pProperties, &phProgram}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urProgramCreateWithBinary")); diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index 02ddda0b50..bc7baeb387 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -54,10 +54,16 @@ deserializeWGMetadata(const ur_program_metadata_t &MetadataElement, } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - std::ignore = size; + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + std::ignore = pLengths; std::ignore = pProperties; UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -106,6 +112,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( + ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, + const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index f154a54051..9c8f214410 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -116,18 +116,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *, - ur_program_handle_t *phProgram) { - - const cl_device_id Devices[1] = {cl_adapter::cast(hDevice)}; - const size_t Lengths[1] = {size}; - cl_int BinaryStatus[1]; + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *, ur_program_handle_t *phProgram) { + std::vector Devices(numDevices); + for (uint32_t i = 0; i < numDevices; ++i) + Devices[i] = cl_adapter::cast(phDevices[i]); + std::vector BinaryStatus(numDevices); cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( - cl_adapter::cast(hContext), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); - CL_RETURN_ON_FAILURE(BinaryStatus[0]); + cl_adapter::cast(hContext), + cl_adapter::cast(numDevices), Devices.data(), pLengths, + ppBinaries, BinaryStatus.data(), &CLResult)); + for (uint32_t i = 0; i < numDevices; ++i) { + CL_RETURN_ON_FAILURE(BinaryStatus[i]); + } CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp index 2f02c9270e..95b1649691 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -206,10 +206,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -224,8 +230,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( getContext()->logger.debug("==== urProgramCreateWithBinary"); - UR_CALL(pfnProgramCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram)); + UR_CALL(pfnProgramCreateWithBinary(hContext, numDevices, phDevices, + pLengths, ppBinaries, pProperties, + phProgram)); UR_CALL(getContext()->interceptor->insertProgram(*phProgram)); return UR_RESULT_SUCCESS; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d6f23eab9a..9cc18c66c4 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -2650,10 +2650,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2667,7 +2673,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( } ur_program_create_with_binary_params_t params = { - &hContext, &hDevice, &size, &pBinary, &pProperties, &phProgram}; + &hContext, &numDevices, &phDevices, &pLengths, + &ppBinaries, &pProperties, &phProgram}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY, "urProgramCreateWithBinary", ¶ms); @@ -2675,8 +2682,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( auto &logger = getContext()->logger; logger.info(" ---> urProgramCreateWithBinary\n"); - ur_result_t result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram); + ur_result_t result = + pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); getContext()->notify_end(UR_FUNCTION_PROGRAM_CREATE_WITH_BINARY, "urProgramCreateWithBinary", ¶ms, &result, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ddf40de35f..748f40638e 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -2721,10 +2721,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2742,11 +2748,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (NULL == hDevice) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pLengths) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == pBinary) { + if (NULL == ppBinaries) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -2763,6 +2773,10 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( pProperties->count == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (numDevices == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } if (getContext()->enableLifetimeValidation && @@ -2770,13 +2784,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( getContext()->refCountContext->logInvalidReference(hContext); } - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hDevice)) { - getContext()->refCountContext->logInvalidReference(hDevice); - } - - ur_result_t result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, - pProperties, phProgram); + ur_result_t result = + pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS) { getContext()->refCountContext->createRefCount(*phProgram); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index e86bada0a0..a67879a9eb 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -2563,10 +2563,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( /// @brief Intercept function for urProgramCreateWithBinary __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -2586,12 +2592,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } // forward to device-platform - result = pfnCreateWithBinary(hContext, hDevice, size, pBinary, pProperties, - phProgram); + result = pfnCreateWithBinary(hContext, numDevices, phDevicesLocal.data(), + pLengths, ppBinaries, pProperties, phProgram); if (UR_RESULT_SUCCESS != result) { return result; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index a77c5916b1..f1044ea3af 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2996,17 +2996,19 @@ ur_result_t UR_APICALL urProgramCreateWithIL( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -3019,21 +3021,29 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t @@ -3045,8 +3055,8 @@ ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnCreateWithBinary(hContext, hDevice, size, pBinary, pProperties, - phProgram); + return pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index e375d496f8..0b3d7f20bc 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2563,17 +2563,19 @@ ur_result_t UR_APICALL urProgramCreateWithIL( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Create a program object from device native binary. +/// @brief Create a program object from native binaries for the specified +/// devices. /// /// @details /// - The application may call this function from simultaneous threads. /// - Following a successful call to this entry point, `phProgram` will -/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or -/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. -/// - The device specified by `hDevice` must be device associated with +/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in +/// `phDevices`. +/// - The devices specified by `phDevices` must be associated with the /// context. /// - The adapter may (but is not required to) perform validation of the -/// provided module during this call. +/// provided modules during this call. /// /// @remarks /// _Analogues_ @@ -2586,21 +2588,29 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pBinary` +/// + `NULL == phDevices` +/// + `NULL == pLengths` +/// + `NULL == ppBinaries` /// + `NULL == phProgram` /// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas` /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0` +/// + `numDevices == 0` /// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY -/// + If `pBinary` isn't a valid binary for `hDevice.` +/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.` ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ///< [in] handle of the context instance - ur_device_handle_t - hDevice, ///< [in] handle to device associated with binary. - size_t size, ///< [in] size in bytes. - const uint8_t *pBinary, ///< [in] pointer to binary. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. const ur_program_properties_t * pProperties, ///< [in][optional] pointer to program creation properties. ur_program_handle_t diff --git a/test/adapters/cuda/kernel_tests.cpp b/test/adapters/cuda/kernel_tests.cpp index 80ec9146fd..085c03030b 100644 --- a/test/adapters/cuda/kernel_tests.cpp +++ b/test/adapters/cuda/kernel_tests.cpp @@ -74,9 +74,10 @@ const char *threeParamsTwoLocal = "\n\ TEST_P(cudaKernelTest, CreateProgramAndKernel) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -116,9 +117,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { ur_program_properties_t programProps{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, 1, &reqdWorkGroupSizeMDProp}; uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - &programProps, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + &programProps, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -138,9 +140,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { TEST_P(cudaKernelTest, URKernelArgumentSimple) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -160,9 +163,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSimple) { TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -189,9 +193,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { TEST_P(cudaKernelTest, URKernelDispatch) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -218,9 +223,10 @@ TEST_P(cudaKernelTest, URKernelDispatch) { TEST_P(cudaKernelTest, URKernelDispatchTwo) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)twoParams, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&twoParams), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); diff --git a/test/conformance/program/CMakeLists.txt b/test/conformance/program/CMakeLists.txt index 317e3df946..31235eaf71 100644 --- a/test/conformance/program/CMakeLists.txt +++ b/test/conformance/program/CMakeLists.txt @@ -7,6 +7,7 @@ add_conformance_test_with_kernels_environment(program urProgramBuild.cpp urProgramCompile.cpp urProgramCreateWithBinary.cpp + urMultiDeviceProgramCreateWithBinary.cpp urProgramCreateWithIL.cpp urProgramCreateWithNativeHandle.cpp urProgramGetBuildInfo.cpp diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index 445f7e6fbd..bd7e269d9f 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -1,12 +1,4 @@ {{NONDETERMINISTIC}} -urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_UR_PROGRAM_BUILD_INFO_STATUS -urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.LinkFailure/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 2c5b6500c3..892b7cfb51 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,12 +1,4 @@ {{NONDETERMINISTIC}} -urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROGRAM_BUILD_INFO_STATUS -urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.LinkFailure/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp new file mode 100644 index 0000000000..95a135af1c --- /dev/null +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -0,0 +1,247 @@ + +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +struct urMultiDeviceProgramCreateWithBinaryTest + : uur::urMultiDeviceProgramTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::SetUp()); + + // First obtain binaries for all devices from the compiler SPIRV program. + devices = uur::DevicesEnvironment::instance->devices; + if (devices.size() < 2) { + GTEST_SKIP(); + } + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + size_t binary_sizes_len = 0; + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + 0, nullptr, &binary_sizes_len)); + // We're expecting number of binaries equal to number of devices. + ASSERT_EQ(binary_sizes_len / sizeof(size_t), devices.size()); + binary_sizes.resize(devices.size()); + binaries.resize(devices.size()); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + binary_sizes.size() * sizeof(size_t), + binary_sizes.data(), nullptr)); + for (size_t i = 0; i < devices.size(); i++) { + size_t binary_size = binary_sizes[i]; + binaries[i].resize(binary_size); + pointers.push_back(binaries[i].data()); + } + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * pointers.size(), + pointers.data(), nullptr)); + + // Now create a program with multiple device binaries. + ASSERT_SUCCESS(urProgramCreateWithBinary( + context, devices.size(), devices.data(), binary_sizes.data(), + pointers.data(), nullptr, &binary_program)); + } + + void TearDown() override { + if (binary_program) { + EXPECT_SUCCESS(urProgramRelease(binary_program)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::TearDown()); + } + + std::vector> binaries; + std::vector devices; + std::vector pointers; + std::vector binary_sizes; + ur_program_handle_t binary_program = nullptr; +}; + +// Create the kernel using the program created with multiple binaries and run it on all devices. +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + CreateAndRunKernelOnAllDevices) { + constexpr size_t global_offset = 0; + constexpr size_t n_dimensions = 1; + constexpr size_t global_size = 100; + constexpr size_t local_size = 100; + + auto kernelName = + uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; + + for (size_t i = 1; i < devices.size(); i++) { + uur::raii::Kernel kernel; + ASSERT_SUCCESS(urProgramBuild(context, binary_program, nullptr)); + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queues[i], kernel.get(), n_dimensions, &global_offset, &local_size, + &global_size, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queues[i])); + } +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CheckCompileAndLink) { + // TODO: Current behaviour is that we allow to compile only IL programs for Level Zero and link only programs in Object state. + // OpenCL allows to compile and link programs created from native binaries, so probably we should align those two. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(urProgramCompile(context, binary_program, nullptr), + UR_RESULT_ERROR_INVALID_OPERATION); + uur::raii::Program linked_program; + ASSERT_EQ(urProgramLink(context, 1, &binary_program, nullptr, + linked_program.ptr()), + UR_RESULT_ERROR_INVALID_OPERATION); + } else if (backend == UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS(urProgramCompile(context, binary_program, nullptr)); + uur::raii::Program linked_program; + ASSERT_SUCCESS(urProgramLink(context, 1, &binary_program, nullptr, + linked_program.ptr())); + } else { + GTEST_SKIP(); + } +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + InvalidProgramBinaryForOneOfTheDevices) { + std::vector pointers_with_invalid_binary; + for (size_t i = 1; i < devices.size(); i++) { + pointers_with_invalid_binary.push_back(nullptr); + } + uur::raii::Program invalid_bin_program; + ASSERT_EQ(urProgramCreateWithBinary(context, devices.size(), devices.data(), + binary_sizes.data(), + pointers_with_invalid_binary.data(), + nullptr, invalid_bin_program.ptr()), + UR_RESULT_ERROR_INVALID_VALUE); +} + +// Test the case when program is built multiple times for different devices from context. +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, MultipleBuildCalls) { + // Run test only for level zero backend which supports urProgramBuildExp. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) { + GTEST_SKIP(); + } + auto first_subset = std::vector( + devices.begin(), devices.begin() + devices.size() / 2); + auto second_subset = std::vector( + devices.begin() + devices.size() / 2, devices.end()); + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + first_subset.data(), nullptr)); + auto kernelName = + uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; + uur::raii::Kernel kernel; + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + ASSERT_SUCCESS(urProgramBuildExp(binary_program, second_subset.size(), + second_subset.data(), nullptr)); + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); + + // Building for the same subset of devices should not fail. + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + first_subset.data(), nullptr)); +} + +// Test the case we get native binaries from program created with multiple binaries which wasn't built (i.e. in Native state). +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, + GetBinariesAndSizesFromProgramInNativeState) { + size_t exp_binary_sizes_len = 0; + std::vector exp_binary_sizes; + std::vector> exp_binaries; + std::vector exp_pointer; + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_BINARY_SIZES, 0, nullptr, + &exp_binary_sizes_len)); + auto num = exp_binary_sizes_len / sizeof(size_t); + exp_binary_sizes.resize(num); + exp_binaries.resize(num); + exp_pointer.resize(num); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_BINARY_SIZES, + exp_binary_sizes.size() * sizeof(size_t), + exp_binary_sizes.data(), nullptr)); + for (size_t i = 0; i < devices.size(); i++) { + size_t binary_size = exp_binary_sizes[i]; + exp_binaries[i].resize(binary_size); + exp_pointer[i] = exp_binaries[i].data(); + } + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * exp_pointer.size(), + exp_pointer.data(), nullptr)); + + // Verify that we get exactly what was provided at the creation step. + ASSERT_EQ(exp_binaries, binaries); + ASSERT_EQ(exp_binary_sizes, binary_sizes); +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, GetIL) { + size_t il_length = 0; + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_IL, 0, + nullptr, &il_length)); + ASSERT_EQ(il_length, 0); + std::vector il(il_length); + ASSERT_EQ(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_IL, il.size(), + il.data(), nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); +} + +TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CheckProgramGetInfo) { + std::vector property_value; + size_t property_size = 0; + + // Program is not in exe state, so error is expected. + for (auto prop : + {UR_PROGRAM_INFO_NUM_KERNELS, UR_PROGRAM_INFO_KERNEL_NAMES}) { + auto result = + urProgramGetInfo(binary_program, prop, 0, nullptr, &property_size); + // TODO: OpenCL and Level Zero return diffent error code, it needs to be fixed. + ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE || + result == UR_RESULT_ERROR_INVALID_PROGRAM); + } + + // Now build the program and check that we can get the info. + ASSERT_SUCCESS(urProgramBuild(context, binary_program, nullptr)); + + size_t logSize; + std::string log; + + for (auto dev : devices) { + ASSERT_SUCCESS(urProgramGetBuildInfo( + program, dev, UR_PROGRAM_BUILD_INFO_LOG, 0, nullptr, &logSize)); + // The size should always include the null terminator. + ASSERT_GT(logSize, 0); + log.resize(logSize); + ASSERT_SUCCESS(urProgramGetBuildInfo(program, dev, + UR_PROGRAM_BUILD_INFO_LOG, logSize, + log.data(), nullptr)); + ASSERT_EQ(log[logSize - 1], '\0'); + } + + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_NUM_KERNELS, + 0, nullptr, &property_size)); + property_value.resize(property_size); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, UR_PROGRAM_INFO_NUM_KERNELS, + property_size, property_value.data(), + nullptr)); + + auto returned_num_of_kernels = + reinterpret_cast(property_value.data()); + ASSERT_GT(*returned_num_of_kernels, 0U); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, + &property_size)); + property_value.resize(property_size); + ASSERT_SUCCESS(urProgramGetInfo(binary_program, + UR_PROGRAM_INFO_KERNEL_NAMES, property_size, + property_value.data(), nullptr)); + auto returned_kernel_names = + reinterpret_cast(property_value.data()); + ASSERT_STRNE(returned_kernel_names, ""); +} diff --git a/test/conformance/program/urProgramCreateWithBinary.cpp b/test/conformance/program/urProgramCreateWithBinary.cpp index 0f525dd293..c34c5a3223 100644 --- a/test/conformance/program/urProgramCreateWithBinary.cpp +++ b/test/conformance/program/urProgramCreateWithBinary.cpp @@ -38,45 +38,54 @@ struct urProgramCreateWithBinaryTest : uur::urProgramTest { UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urProgramCreateWithBinaryTest); TEST_P(urProgramCreateWithBinaryTest, Success) { - ASSERT_SUCCESS(urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), nullptr, - &binary_program)); + auto size = binary.size(); + const uint8_t *data = binary.data(); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &size, &data, + nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullHandleContext) { + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urProgramCreateWithBinary(nullptr, device, binary.size(), - binary.data(), nullptr, + urProgramCreateWithBinary(nullptr, 1, &device, &size, + &data, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullHandleDevice) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urProgramCreateWithBinary(context, nullptr, binary.size(), - binary.data(), nullptr, + auto size = binary.size(); + const uint8_t *data = binary.data(); + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, + urProgramCreateWithBinary(context, 0, nullptr, &size, + &data, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerBinary) { + auto size = binary.size(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), + urProgramCreateWithBinary(context, 1, &device, &size, nullptr, nullptr, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerProgram) { + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), nullptr, - nullptr)); + urProgramCreateWithBinary(context, 1, &device, &size, + &data, nullptr, nullptr)); } TEST_P(urProgramCreateWithBinaryTest, InvalidNullPointerMetadata) { ur_program_properties_t properties = {}; properties.count = 1; + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), &properties, + urProgramCreateWithBinary(context, 1, &device, &size, + &data, &properties, &binary_program)); } @@ -89,17 +98,21 @@ TEST_P(urProgramCreateWithBinaryTest, InvalidSizePropertyCount) { md_string.size(), md_value}; ur_program_properties_t properties = {}; properties.pMetadatas = &md; + auto size = binary.size(); + const uint8_t *data = binary.data(); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, - urProgramCreateWithBinary(context, device, binary.size(), - binary.data(), &properties, + urProgramCreateWithBinary(context, 1, &device, &size, + &data, &properties, &binary_program)); } TEST_P(urProgramCreateWithBinaryTest, BuildInvalidProgramBinary) { ur_program_handle_t program = nullptr; uint8_t binary[] = {0, 1, 2, 3, 4}; - auto result = urProgramCreateWithBinary(context, device, 5, binary, nullptr, - &program); + const uint8_t *data = binary; + size_t size = 5; + auto result = urProgramCreateWithBinary(context, 1, &device, &size, &data, + nullptr, &program); // The driver is not required to reject the binary ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_BINARY || result == UR_RESULT_SUCCESS); diff --git a/test/conformance/program/urProgramCreateWithNativeHandle.cpp b/test/conformance/program/urProgramCreateWithNativeHandle.cpp index ddf3767e43..00493d90a8 100644 --- a/test/conformance/program/urProgramCreateWithNativeHandle.cpp +++ b/test/conformance/program/urProgramCreateWithNativeHandle.cpp @@ -9,6 +9,14 @@ struct urProgramCreateWithNativeHandleTest : uur::urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, + nullptr)); + // For Level Zero we have to build the program to have the native handle. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + } UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( urProgramGetNativeHandle(program, &native_program_handle)); } diff --git a/test/conformance/program/urProgramGetBuildInfo.cpp b/test/conformance/program/urProgramGetBuildInfo.cpp index 02faebcfb0..cf4e9b9217 100644 --- a/test/conformance/program/urProgramGetBuildInfo.cpp +++ b/test/conformance/program/urProgramGetBuildInfo.cpp @@ -33,8 +33,19 @@ TEST_P(urProgramGetBuildInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; std::vector property_value; - ASSERT_SUCCESS(urProgramGetBuildInfo(program, device, property_name, 0, - nullptr, &property_size)); + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + auto result = urProgramGetBuildInfo(program, device, property_name, 0, + nullptr, &property_size); + + if (property_name == UR_PROGRAM_BUILD_INFO_STATUS && + backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); + return; + } + + ASSERT_SUCCESS(result); property_value.resize(property_size); ASSERT_SUCCESS(urProgramGetBuildInfo(program, device, property_name, property_size, property_value.data(), diff --git a/test/conformance/program/urProgramGetFunctionPointer.cpp b/test/conformance/program/urProgramGetFunctionPointer.cpp index 00f5ad74e0..41d397d527 100644 --- a/test/conformance/program/urProgramGetFunctionPointer.cpp +++ b/test/conformance/program/urProgramGetFunctionPointer.cpp @@ -29,10 +29,18 @@ TEST_P(urProgramGetFunctionPointerTest, Success) { TEST_P(urProgramGetFunctionPointerTest, InvalidKernelName) { void *function_pointer = nullptr; std::string missing_function = "aFakeFunctionName"; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_KERNEL_NAME, - urProgramGetFunctionPointer(device, program, - missing_function.data(), - &function_pointer)); + auto result = urProgramGetFunctionPointer( + device, program, missing_function.data(), &function_pointer); + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // TODO: level zero backend incorrectly returns UR_RESULT_ERROR_UNSUPPORTED_FEATURE + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_EQ(UR_RESULT_ERROR_UNSUPPORTED_FEATURE, result); + } else { + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_KERNEL_NAME, result); + } + ASSERT_EQ(function_pointer, nullptr); } diff --git a/test/conformance/program/urProgramGetNativeHandle.cpp b/test/conformance/program/urProgramGetNativeHandle.cpp index 05bba697ec..864aa8bd62 100644 --- a/test/conformance/program/urProgramGetNativeHandle.cpp +++ b/test/conformance/program/urProgramGetNativeHandle.cpp @@ -9,6 +9,13 @@ using urProgramGetNativeHandleTest = uur::urProgramTest; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urProgramGetNativeHandleTest); TEST_P(urProgramGetNativeHandleTest, Success) { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // For Level Zero we have to build the program to have the native handle. + if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + } ur_native_handle_t native_program_handle = 0; if (auto error = urProgramGetNativeHandle(program, &native_program_handle)) { diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index ec339a5f40..006ea09b8b 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -569,9 +569,11 @@ ur_result_t KernelsEnvironment::CreateProgram( backend == UR_PLATFORM_BACKEND_CUDA) { // The CUDA and HIP adapters do not support urProgramCreateWithIL so we // need to use urProgramCreateWithBinary instead. + auto size = binary.size(); + auto data = binary.data(); if (auto error = urProgramCreateWithBinary( - hContext, hDevice, binary.size(), - reinterpret_cast(binary.data()), properties, + hContext, 1, &hDevice, &size, + reinterpret_cast(&data), properties, phProgram)) { return error; } diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index b853164fb6..00bee6ba14 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1560,6 +1560,45 @@ struct urMultiDeviceQueueTest : urMultiDeviceContextTest { std::vector queues; }; +struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceQueueTest::SetUp()); + + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + // Multi-device programs are not supported for AMD and CUDA + if (backend == UR_PLATFORM_BACKEND_HIP || + backend == UR_PLATFORM_BACKEND_CUDA) { + GTEST_SKIP(); + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::KernelsEnvironment::instance->LoadSource(program_name, + il_binary)); + + const ur_program_properties_t properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(metadatas.size()), + metadatas.empty() ? nullptr : metadatas.data()}; + + ASSERT_SUCCESS(urProgramCreateWithIL(context, (*il_binary).data(), + (*il_binary).size(), &properties, + &program)); + } + + void TearDown() override { + if (program) { + EXPECT_SUCCESS(urProgramRelease(program)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceQueueTest::TearDown()); + } + + std::shared_ptr> il_binary; + std::string program_name = "foo"; + ur_program_handle_t program = nullptr; + std::vector metadatas{}; +}; + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_FIXTURES_H_INCLUDED