From a3b708272b60ae0ce1915a1d17f659ff27a7c766 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Tue, 10 Dec 2024 20:31:33 +0000 Subject: [PATCH 1/2] Bump UMF to main --- source/common/CMakeLists.txt | 11 ++++++----- source/common/umf_helpers.hpp | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index b5fb64cfc5..6d7144fa1c 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -28,14 +28,15 @@ target_include_directories(ur_common PUBLIC message(STATUS "Download Unified Memory Framework from github.com") if (NOT DEFINED UMF_REPO) - set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") + set(UMF_REPO "https://github.com/igchor/unified-memory-framework.git") endif() if (NOT DEFINED UMF_TAG) - # tag v0.10.0 - # Tagger: Łukasz Stolarczuk - # Date: Mon Dec 9 17:01:43 2024 +0100 - set(UMF_TAG v0.10.0) + # commit f160016e40080e31f098984cb8d99d412806e966 + # Merge: 625a9d4 a094b44 + # Author: Łukasz Stolarczuk + # Date: Tue Dec 10 12:59:44 2024 +0100 + set(UMF_TAG 477edb6e444ae3d34b9e2d674df42df71d637c2b) endif() message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") diff --git a/source/common/umf_helpers.hpp b/source/common/umf_helpers.hpp index d067b8ab1a..4b7a4a7b6f 100644 --- a/source/common/umf_helpers.hpp +++ b/source/common/umf_helpers.hpp @@ -168,7 +168,7 @@ auto memoryProviderMakeUnique(Args &&...args) { UMF_ASSIGN_OP(ops, T, get_recommended_page_size, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, get_min_page_size, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, get_name, ""); - UMF_ASSIGN_OP(ops.ext, T, free, UMF_RESULT_ERROR_UNKNOWN); + UMF_ASSIGN_OP(ops, T, free, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops.ext, T, purge_lazy, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops.ext, T, purge_force, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops.ext, T, allocation_merge, UMF_RESULT_ERROR_UNKNOWN); From 8a54abd4ce25b5dec0fc3775f06d15db3182424a Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Thu, 12 Dec 2024 19:43:07 +0000 Subject: [PATCH 2/2] [CTS] add test for deadlock when using urUSMFree --- source/adapters/level_zero/v2/usm.cpp | 6 + test/conformance/device_code/CMakeLists.txt | 1 + test/conformance/device_code/atomic_wait.cpp | 28 ++++ .../enqueue/urEnqueueKernelLaunch.cpp | 144 ++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 test/conformance/device_code/atomic_wait.cpp diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index f7396e282f..c9c852feef 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -129,6 +129,12 @@ makePool(usm::umf_disjoint_pool_config_t *poolParams, } } + umf_ret = umfLevelZeroMemoryProviderParamsSetFreePolicy( + params, UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFER_FREE); + if (umf_ret != UMF_RESULT_SUCCESS) { + throw umf::umf2urResult(umf_ret); + } + auto [ret, provider] = umf::providerMakeUniqueFromOps(umfLevelZeroMemoryProviderOps(), params); if (ret != UMF_RESULT_SUCCESS) { diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 1621b01544..6e295ab2f5 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -135,6 +135,7 @@ macro(add_device_binary SOURCE_FILE) list(APPEND DEVICE_CODE_SOURCES ${SOURCE_FILE}) endmacro() +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/atomic_wait.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/bar.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/device_global.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill.cpp) diff --git a/test/conformance/device_code/atomic_wait.cpp b/test/conformance/device_code/atomic_wait.cpp new file mode 100644 index 0000000000..2893362ad4 --- /dev/null +++ b/test/conformance/device_code/atomic_wait.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +template +using global_atomic_ref = + sycl::atomic_ref; + +int main() { + sycl::queue deviceQueue; + + auto atomic_cnt = sycl::malloc_shared(1, deviceQueue); + + auto e1 = deviceQueue.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { + global_atomic_ref atomic(*atomic_cnt); + while (atomic.load() == 0) { + } + }); + }); + e1.wait(); + + return 0; +} diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 7ffa072466..d0ed82dfca 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -5,6 +5,9 @@ #include #include +#include + +#include "helpers.h" struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest { void SetUp() override { @@ -606,3 +609,144 @@ TEST_P(urEnqueueKernelLaunchUSMLinkedList, Success) { list_cur = list_cur->next; } } + +struct urTwoQueueLaunchBlockingFree : uur::urMultiQueueMultiDeviceTest { + std::string KernelName; + std::vector programs; + std::vector kernels; + std::vector SharedMem; + + static constexpr char ProgramName[] = "atomic_wait"; + + void SetUp() override { + if (uur::KernelsEnvironment::instance->devices.size() < 2) { + GTEST_SKIP() << "This test requires at least 2 devices."; + } + + // ur_platform_backend_t backend; + // ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + // sizeof(ur_platform_backend_t), + // &backend, nullptr)); + + // if (backend == UR_PLATFORM_BACKEND_OPENCL) { + // GTEST_FAIL() << "TODO: this test fails on OPENCL backend"; + // } + + for (auto &device : uur::KernelsEnvironment::instance->devices) { + ur_device_usm_access_capability_flags_t shared_usm_flags = 0; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Cross Device USM is not supported."; + } + if (!(shared_usm_flags & + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS)) { + GTEST_SKIP() << "Atomic USM is not supported."; + } + } + + std::vector devs; + devs.push_back(uur::KernelsEnvironment::instance->devices[0]); + devs.push_back(uur::KernelsEnvironment::instance->devices[1]); + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiQueueMultiDeviceTest::SetUp(devs, 2)); + + programs.resize(devices.size()); + kernels.resize(devices.size()); + SharedMem.resize(devices.size()); + + KernelName = uur::KernelsEnvironment::instance->GetEntryPointNames( + ProgramName)[0]; + + std::shared_ptr> il_binary; + std::vector metadatas{}; + + uur::KernelsEnvironment::instance->LoadSource(ProgramName, il_binary); + + for (size_t i = 0; i < devices.size(); i++) { + const ur_program_properties_t properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(metadatas.size()), + metadatas.empty() ? nullptr : metadatas.data()}; + + uur::raii::Program program; + ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram( + platform, context, devices[i], *il_binary, &properties, + &programs[i])); + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urProgramBuild(context, programs[i], nullptr)); + ASSERT_SUCCESS( + urKernelCreate(programs[i], KernelName.data(), &kernels[i])); + + ASSERT_SUCCESS(urUSMSharedAlloc(context, devices[i], nullptr, + nullptr, sizeof(uint64_t), + &SharedMem[i])); + ASSERT_NE(SharedMem[i], nullptr); + + uint64_t pattern = 0; + ASSERT_SUCCESS(urEnqueueUSMFill( + queues[i], SharedMem[i], sizeof(uint64_t), &pattern, + sizeof(uint64_t), 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[i])); + + ASSERT_SUCCESS( + urKernelSetArgPointer(kernels[i], 0, nullptr, SharedMem[i])); + } + } + + void TearDown() override { + for (auto &Ptr : SharedMem) { + urUSMFree(context, Ptr); + } + for (const auto &kernel : kernels) { + urKernelRelease(kernel); + } + for (const auto &program : programs) { + urProgramRelease(program); + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::TearDown()); + } +}; + +TEST_F(urTwoQueueLaunchBlockingFree, FreeDoesNotDeadlock) { + constexpr size_t global_offset = 0; + static constexpr size_t global_size = 1; + + auto signalKernel = [&](size_t i) { + // use different device for signaling to avoid a deadlock + auto signalQueue = queues[(i + 1) % queues.size()]; + + uint64_t pattern = 1; + ASSERT_SUCCESS(urEnqueueUSMFill(signalQueue, SharedMem[i], + sizeof(uint64_t), &pattern, + sizeof(uint64_t), 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(signalQueue)); + }; + + ur_event_handle_t k1Executed; + + // do not block the first kernel + signalKernel(0); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernels[0], 1, + &global_offset, &global_size, nullptr, + 0, nullptr, &k1Executed)); + + // launch second kernel on the same device (devices[2] == devices[0]), + // this one will spin until we signal it + assert(devices[0] == devices[2]); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[2], kernels[2], 1, + &global_offset, &global_size, nullptr, + 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urEventWait(1, &k1Executed)); + ASSERT_SUCCESS(urUSMFree(context, SharedMem[0])); + SharedMem[0] = 0; + + signalKernel(2); + + ASSERT_SUCCESS(urQueueFinish(queues[0])); + ASSERT_SUCCESS(urQueueFinish(queues[2])); +}