From 21684e684847752948bc4cd3d122bd282ad8ba53 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Tue, 3 Dec 2024 23:48:27 -0800 Subject: [PATCH 1/4] fix metadata with assert --- .../loader/layers/sanitizer/asan/asan_ddi.cpp | 41 +- .../sanitizer/asan/asan_interceptor.cpp | 375 +++++++++--------- .../sanitizer/asan/asan_interceptor.hpp | 12 +- 3 files changed, 211 insertions(+), 217 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index 774ce3a61d..dca3f4bb05 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -52,12 +52,6 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, return UR_RESULT_SUCCESS; } -bool isInstrumentedKernel(ur_kernel_handle_t hKernel) { - auto hProgram = GetProgram(hKernel); - auto PI = getAsanInterceptor()->getProgramInfo(hProgram); - return PI->isKernelInstrumented(hKernel); -} - } // namespace /////////////////////////////////////////////////////////////////////////////// @@ -465,12 +459,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( getContext()->logger.debug("==== urEnqueueKernelLaunch"); - if (!isInstrumentedKernel(hKernel)) { - return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numEventsInWaitList, phEventWaitList, phEvent); - } - USMLaunchInfo LaunchInfo(GetContext(hKernel), GetDevice(hQueue), pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, workDim); @@ -1362,9 +1350,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate( getContext()->logger.debug("==== urKernelCreate"); UR_CALL(pfnCreate(hProgram, pKernelName, phKernel)); - if (isInstrumentedKernel(*phKernel)) { - UR_CALL(getAsanInterceptor()->insertKernel(*phKernel)); - } + UR_CALL(getAsanInterceptor()->insertKernel(*phKernel)); return UR_RESULT_SUCCESS; } @@ -1385,9 +1371,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain( UR_CALL(pfnRetain(hKernel)); auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); - if (KernelInfo) { - KernelInfo->RefCount++; - } + KernelInfo->RefCount++; return UR_RESULT_SUCCESS; } @@ -1407,10 +1391,8 @@ __urdlllocal ur_result_t urKernelRelease( UR_CALL(pfnRelease(hKernel)); auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); - if (KernelInfo) { - if (--KernelInfo->RefCount == 0) { - UR_CALL(getAsanInterceptor()->eraseKernel(hKernel)); - } + if (--KernelInfo->RefCount == 0) { + UR_CALL(getAsanInterceptor()->eraseKernel(hKernel)); } return UR_RESULT_SUCCESS; @@ -1439,8 +1421,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue( std::shared_ptr KernelInfo; if (argSize == sizeof(ur_mem_handle_t) && (MemBuffer = getAsanInterceptor()->getMemBuffer( - *ur_cast(pArgValue))) && - (KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel))) { + *ur_cast(pArgValue)))) { + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KernelInfo->Mutex); KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); } else { @@ -1470,8 +1452,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( std::shared_ptr MemBuffer; std::shared_ptr KernelInfo; - if ((MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue)) && - (KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel))) { + if ((MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue))) { + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KernelInfo->Mutex); KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); } else { @@ -1501,7 +1483,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( "==== urKernelSetArgLocal (argIndex={}, argSize={})", argIndex, argSize); - if (auto KI = getAsanInterceptor()->getKernelInfo(hKernel)) { + { + auto KI = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KI->Mutex); // TODO: get local variable alignment auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal( @@ -1538,8 +1521,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer( pArgValue); std::shared_ptr KI; - if (getAsanInterceptor()->getOptions().DetectKernelArguments && - (KI = getAsanInterceptor()->getKernelInfo(hKernel))) { + if (getAsanInterceptor()->getOptions().DetectKernelArguments) { + auto KI = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KI->Mutex); KI->PointerArgs[argIndex] = {pArgValue, GetCurrentBacktrace()}; } diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 271d846990..edfd200167 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -255,9 +255,6 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, auto ContextInfo = getContextInfo(Context); auto DeviceInfo = getDeviceInfo(Device); auto KernelInfo = getKernelInfo(Kernel); - assert(KernelInfo && "Kernel should be instrumented"); - - UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get())); ManagedQueue InternalQueue(Context, Device); if (!InternalQueue) { @@ -648,7 +645,13 @@ ur_result_t AsanInterceptor::insertKernel(ur_kernel_handle_t Kernel) { if (m_KernelMap.find(Kernel) != m_KernelMap.end()) { return UR_RESULT_SUCCESS; } - m_KernelMap.emplace(Kernel, std::make_shared(Kernel)); + + auto hProgram = GetProgram(Kernel); + auto PI = getAsanInterceptor()->getProgramInfo(hProgram); + bool IsInstrumented = PI->isKernelInstrumented(Kernel); + + m_KernelMap.emplace(Kernel, + std::make_shared(Kernel, IsInstrumented)); return UR_RESULT_SUCCESS; } @@ -689,204 +692,210 @@ ur_result_t AsanInterceptor::prepareLaunch( std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) { + auto KernelInfo = getKernelInfo(Kernel); - do { - auto KernelInfo = getKernelInfo(Kernel); - assert(KernelInfo && "Kernel should be instrumented"); - - // Validate pointer arguments - if (getOptions().DetectKernelArguments) { - for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) { - auto Ptr = PtrPair.first; - if (Ptr == nullptr) { - continue; - } - if (auto ValidateResult = ValidateUSMPointer( - ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { - ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, - ValidateResult, PtrPair.second); - exitWithErrors(); - } + auto ArgNums = GetKernelNumArgs(Kernel); + auto LocalMemoryUsage = + GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle); + auto PrivateMemoryUsage = + GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle); + + getContext()->logger.info( + "KernelInfo {} (Name={}, ArgNums={}, IsInstrumented={}, " + "LocalMemory={}, PrivateMemory={})", + (void *)Kernel, GetKernelName(Kernel), ArgNums, + KernelInfo->IsInstrumented, LocalMemoryUsage, PrivateMemoryUsage); + + // Validate pointer arguments + if (getOptions().DetectKernelArguments) { + for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) { + auto Ptr = PtrPair.first; + if (Ptr == nullptr) { + continue; } - } - - // Set membuffer arguments - for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) { - char *ArgPointer = nullptr; - UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer)); - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgIndex, nullptr, ArgPointer); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error( - "Failed to set buffer {} as the {} arg to kernel {}: {}", - ur_cast(MemBuffer.get()), ArgIndex, Kernel, - URes); + if (auto ValidateResult = ValidateUSMPointer( + ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { + ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, + ValidateResult, PtrPair.second); + exitWithErrors(); } } + } - // Set launch info argument - auto ArgNums = GetKernelNumArgs(Kernel); - if (ArgNums) { - getContext()->logger.debug( - "launch_info {} (numLocalArgs={}, localArgs={})", - (void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs, - (void *)LaunchInfo.Data->LocalArgs); - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgNums - 1, nullptr, LaunchInfo.Data); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to set launch info: {}", - URes); - return URes; - } + // Set membuffer arguments + for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) { + char *ArgPointer = nullptr; + UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer)); + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgIndex, nullptr, ArgPointer); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to set buffer {} as the {} arg to kernel {}: {}", + ur_cast(MemBuffer.get()), ArgIndex, Kernel, + URes); + return URes; } + } - LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; - LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; - LaunchInfo.Data->DeviceTy = DeviceInfo->Type; - LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; - - if (LaunchInfo.LocalWorkSize.empty()) { - LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); - auto URes = - getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize( - Kernel, Queue, LaunchInfo.WorkDim, - LaunchInfo.GlobalWorkOffset, LaunchInfo.GlobalWorkSize, - LaunchInfo.LocalWorkSize.data()); - if (URes != UR_RESULT_SUCCESS) { - if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - return URes; - } - // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback - // to inefficient implementation - for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - LaunchInfo.LocalWorkSize[Dim] = 1; - } - } - } + if (!KernelInfo->IsInstrumented) { + return UR_RESULT_SUCCESS; + } - const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data(); - uint32_t NumWG = 1; - for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) / - LocalWorkSize[Dim]; + // Set launch info argument + { + assert(ArgNums >= 1 && + "Sanitized Kernel should have at least one argument"); + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgNums - 1, nullptr, LaunchInfo.Data); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to set launch info: {}", URes); + return URes; } + } - auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle, - Device = DeviceInfo->Handle, - Queue](size_t Size, uptr &Ptr) { - void *Allocated = nullptr; - auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, nullptr, nullptr, Size, &Allocated); - if (URes != UR_RESULT_SUCCESS) { + UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get())); + + LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data->DeviceTy = DeviceInfo->Type; + LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; + + getContext()->logger.info( + "LaunchInfo {} (device={}, debug={}, numLocalArgs={}, localArgs={})", + (void *)LaunchInfo.Data, ToString(LaunchInfo.Data->DeviceTy), + LaunchInfo.Data->Debug, LaunchInfo.Data->NumLocalArgs, + (void *)LaunchInfo.Data->LocalArgs); + + // urKernelGetSuggestedLocalWorkSize must be called after urKernelSetArgPointer + if (LaunchInfo.LocalWorkSize.empty()) { + LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); + auto URes = + getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize( + Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset, + LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data()); + if (URes != UR_RESULT_SUCCESS) { + if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { return URes; } - // Initialize shadow memory - URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size); - if (URes != UR_RESULT_SUCCESS) { - [[maybe_unused]] auto URes = - getContext()->urDdiTable.USM.pfnFree(Context, Allocated); - assert(URes == UR_RESULT_SUCCESS && - "urUSMFree failed at allocating shadow memory"); - Allocated = nullptr; + // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback + // to inefficient implementation + for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { + LaunchInfo.LocalWorkSize[Dim] = 1; } - Ptr = (uptr)Allocated; + } + } + + const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data(); + uint32_t NumWG = 1; + for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { + NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) / + LocalWorkSize[Dim]; + } + + auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle, + Device = DeviceInfo->Handle, + Queue](size_t Size, uptr &Ptr) { + void *Allocated = nullptr; + auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, Size, &Allocated); + if (URes != UR_RESULT_SUCCESS) { return URes; - }; - - auto LocalMemoryUsage = - GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle); - auto PrivateMemoryUsage = - GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle); - - getContext()->logger.info( - "KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel, - LocalMemoryUsage, PrivateMemoryUsage); - - // Write shadow memory offset for local memory - if (getOptions().DetectLocals) { - // CPU needn't this - if (DeviceInfo->Type == DeviceType::GPU_PVC || - DeviceInfo->Type == DeviceType::GPU_DG2) { - const size_t LocalMemorySize = - GetDeviceLocalMemorySize(DeviceInfo->Handle); - const size_t LocalShadowMemorySize = - (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; - - getContext()->logger.debug( - "LocalMemory(WorkGroup={}, LocalMemorySize={}, " - "LocalShadowMemorySize={})", - NumWG, LocalMemorySize, LocalShadowMemorySize); - - if (EnqueueAllocateShadowMemory( - LocalShadowMemorySize, - LaunchInfo.Data->LocalShadowOffset) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for local " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking local memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - LaunchInfo.Data->LocalShadowOffsetEnd = - LaunchInfo.Data->LocalShadowOffset + - LocalShadowMemorySize - 1; - - ContextInfo->Stats.UpdateShadowMalloced( - LocalShadowMemorySize); - - getContext()->logger.info( - "ShadowMemory(Local, {} - {})", - (void *)LaunchInfo.Data->LocalShadowOffset, - (void *)LaunchInfo.Data->LocalShadowOffsetEnd); - } + } + // Initialize shadow memory + URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size); + if (URes != UR_RESULT_SUCCESS) { + [[maybe_unused]] auto URes = + getContext()->urDdiTable.USM.pfnFree(Context, Allocated); + assert(URes == UR_RESULT_SUCCESS && + "urUSMFree failed at allocating shadow memory"); + Allocated = nullptr; + } + Ptr = (uptr)Allocated; + return URes; + }; + + // Write shadow memory offset for local memory + if (getOptions().DetectLocals) { + // CPU needn't this + if (DeviceInfo->Type == DeviceType::GPU_PVC || + DeviceInfo->Type == DeviceType::GPU_DG2) { + const size_t LocalMemorySize = + GetDeviceLocalMemorySize(DeviceInfo->Handle); + const size_t LocalShadowMemorySize = + (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; + + getContext()->logger.debug( + "LocalMemory(WorkGroup={}, LocalMemorySize={}, " + "LocalShadowMemorySize={})", + NumWG, LocalMemorySize, LocalShadowMemorySize); + + if (EnqueueAllocateShadowMemory( + LocalShadowMemorySize, + LaunchInfo.Data->LocalShadowOffset) != UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for local " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking local memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + LaunchInfo.Data->LocalShadowOffsetEnd = + LaunchInfo.Data->LocalShadowOffset + LocalShadowMemorySize - + 1; + + ContextInfo->Stats.UpdateShadowMalloced(LocalShadowMemorySize); + + getContext()->logger.info( + "ShadowMemory(Local, {} - {})", + (void *)LaunchInfo.Data->LocalShadowOffset, + (void *)LaunchInfo.Data->LocalShadowOffsetEnd); } } + } - // Write shadow memory offset for private memory - if (getOptions().DetectPrivates) { - if (DeviceInfo->Type == DeviceType::CPU) { - LaunchInfo.Data->PrivateShadowOffset = - DeviceInfo->Shadow->ShadowBegin; - } else if (DeviceInfo->Type == DeviceType::GPU_PVC || - DeviceInfo->Type == DeviceType::GPU_DG2) { - const size_t PrivateShadowMemorySize = - (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; - - getContext()->logger.debug("PrivateMemory(WorkGroup={}, " - "PrivateShadowMemorySize={})", - NumWG, PrivateShadowMemorySize); - - if (EnqueueAllocateShadowMemory( - PrivateShadowMemorySize, - LaunchInfo.Data->PrivateShadowOffset) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for private " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking private memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - LaunchInfo.Data->PrivateShadowOffsetEnd = - LaunchInfo.Data->PrivateShadowOffset + - PrivateShadowMemorySize - 1; - - ContextInfo->Stats.UpdateShadowMalloced( - PrivateShadowMemorySize); - - getContext()->logger.info( - "ShadowMemory(Private, {} - {})", - (void *)LaunchInfo.Data->PrivateShadowOffset, - (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); - } + // Write shadow memory offset for private memory + if (getOptions().DetectPrivates) { + if (DeviceInfo->Type == DeviceType::CPU) { + LaunchInfo.Data->PrivateShadowOffset = + DeviceInfo->Shadow->ShadowBegin; + } else if (DeviceInfo->Type == DeviceType::GPU_PVC || + DeviceInfo->Type == DeviceType::GPU_DG2) { + const size_t PrivateShadowMemorySize = + (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; + + getContext()->logger.debug("PrivateMemory(WorkGroup={}, " + "PrivateShadowMemorySize={})", + NumWG, PrivateShadowMemorySize); + + if (EnqueueAllocateShadowMemory( + PrivateShadowMemorySize, + LaunchInfo.Data->PrivateShadowOffset) != + UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for private " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking private memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + LaunchInfo.Data->PrivateShadowOffsetEnd = + LaunchInfo.Data->PrivateShadowOffset + + PrivateShadowMemorySize - 1; + + ContextInfo->Stats.UpdateShadowMalloced( + PrivateShadowMemorySize); + + getContext()->logger.info( + "ShadowMemory(Private, {} - {})", + (void *)LaunchInfo.Data->PrivateShadowOffset, + (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); } } - } while (false); + } return UR_RESULT_SUCCESS; } diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index 926be1388e..d24be1e1f2 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -85,6 +85,9 @@ struct KernelInfo { ur_kernel_handle_t Handle; std::atomic RefCount = 1; + // sanitized kernel + bool IsInstrumented = false; + // lock this mutex if following fields are accessed ur_shared_mutex Mutex; std::unordered_map> BufferArgs; @@ -94,7 +97,8 @@ struct KernelInfo { // Need preserve the order of local arguments std::map LocalArgs; - explicit KernelInfo(ur_kernel_handle_t Kernel) : Handle(Kernel) { + explicit KernelInfo(ur_kernel_handle_t Kernel, bool IsInstrumented) + : Handle(Kernel), IsInstrumented(IsInstrumented) { [[maybe_unused]] auto Result = getContext()->urDdiTable.Kernel.pfnRetain(Kernel); assert(Result == UR_RESULT_SUCCESS); @@ -272,10 +276,8 @@ class AsanInterceptor { std::shared_ptr getKernelInfo(ur_kernel_handle_t Kernel) { std::shared_lock Guard(m_KernelMapMutex); - if (m_KernelMap.find(Kernel) != m_KernelMap.end()) { - return m_KernelMap[Kernel]; - } - return nullptr; + assert(m_KernelMap.find(Kernel) != m_KernelMap.end()); + return m_KernelMap[Kernel]; } const AsanOptions &getOptions() { return m_Options; } From 75a82aa9250d6609dacb18c0b887f19f3f76b5bc Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 13 Dec 2024 08:09:54 +0100 Subject: [PATCH 2/4] fix build --- source/loader/layers/sanitizer/asan/asan_interceptor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index ad3beda007..fb17b0a7f5 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -836,8 +836,9 @@ ur_result_t AsanInterceptor::prepareLaunch( getContext()->logger.info( "LaunchInfo {} (device={}, debug={}, numLocalArgs={}, localArgs={})", - (void *)LaunchInfo.Data.getDevicePtr(), LaunchInfo.Data.Host.DeviceTy, - LaunchInfo.Data.Host.Debug, LaunchInfo.Data.Host.NumLocalArgs, + (void *)LaunchInfo.Data.getDevicePtr(), + ToString(LaunchInfo.Data.Host.DeviceTy), LaunchInfo.Data.Host.Debug, + LaunchInfo.Data.Host.NumLocalArgs, (void *)LaunchInfo.Data.Host.LocalArgs); return UR_RESULT_SUCCESS; From d05b5d5b5bf887c0eae98301e965ab55ca158531 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Dec 2024 09:55:18 +0100 Subject: [PATCH 3/4] init LaunchInfo --- source/loader/CMakeLists.txt | 1 + source/loader/layers/sanitizer/asan/asan_ddi.cpp | 1 + source/loader/layers/sanitizer/asan/asan_libdevice.hpp | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index d8f6056ae9..a10e99f422 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -136,6 +136,7 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_ddi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_ddi.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_interceptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_interceptor.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_libdevice.hpp diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index 380b51a0da..9378544d65 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -464,6 +464,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, workDim); + UR_CALL(LaunchInfo.Data.syncToDevice(hQueue)); UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); diff --git a/source/loader/layers/sanitizer/asan/asan_libdevice.hpp b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp index a2d5ecd6be..4c6aaaeac8 100644 --- a/source/loader/layers/sanitizer/asan/asan_libdevice.hpp +++ b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp @@ -66,7 +66,7 @@ struct AsanRuntimeData { uint32_t Debug = 0; int ReportFlag = 0; - AsanErrorReport Report[ASAN_MAX_NUM_REPORTS]; + AsanErrorReport Report[ASAN_MAX_NUM_REPORTS] = {}; }; constexpr unsigned ASAN_SHADOW_SCALE = 4; From 390d0b556d67cb31abb87854016df4ca86d18ea8 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 16 Dec 2024 09:59:01 +0100 Subject: [PATCH 4/4] remove unused var --- source/loader/layers/sanitizer/asan/asan_ddi.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index 9378544d65..bf4dff157a 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -1420,7 +1420,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue( getContext()->logger.debug("==== urKernelSetArgValue"); std::shared_ptr MemBuffer; - std::shared_ptr KernelInfo; if (argSize == sizeof(ur_mem_handle_t) && (MemBuffer = getAsanInterceptor()->getMemBuffer( *ur_cast(pArgValue)))) { @@ -1453,7 +1452,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( getContext()->logger.debug("==== urKernelSetArgMemObj"); std::shared_ptr MemBuffer; - std::shared_ptr KernelInfo; if ((MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue))) { auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KernelInfo->Mutex);