diff --git a/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/source/loader/layers/sanitizer/msan/msan_buffer.cpp index 66ebb10326..b93fd5ed9a 100644 --- a/source/loader/layers/sanitizer/msan/msan_buffer.cpp +++ b/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -93,7 +93,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; URes = getMsanInterceptor()->allocateMemory( - Context, Device, &USMDesc, Pool, Size, + Context, Device, &USMDesc, Pool, Size, AllocType::DEVICE_USM, ur_cast(&Allocation)); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error( @@ -130,8 +130,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getMsanInterceptor()->allocateMemory( - Context, nullptr, &USMDesc, Pool, Size, + URes = getContext()->urDdiTable.USM.pfnHostAlloc( + Context, &USMDesc, Pool, Size, ur_cast(&HostAllocation)); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("Failed to allocate {} bytes host " diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 2dfeadc358..26779244fb 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -99,13 +99,50 @@ ur_result_t urUSMDeviceAlloc( ) { getContext()->logger.debug("==== urUSMDeviceAlloc"); - return getMsanInterceptor()->allocateMemory(hContext, hDevice, pUSMDesc, - pool, size, ppMem); + return getMsanInterceptor()->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, AllocType::DEVICE_USM, ppMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMHostAlloc +ur_result_t UR_APICALL urUSMHostAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + const ur_usm_desc_t + *pUSMDesc, ///< [in][optional] USM memory allocation descriptor + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM host memory object +) { + getContext()->logger.debug("==== urUSMHostAlloc"); + + return getMsanInterceptor()->allocateMemory( + hContext, nullptr, pUSMDesc, pool, size, AllocType::HOST_USM, ppMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMSharedAlloc +ur_result_t UR_APICALL urUSMSharedAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_usm_desc_t * + pUSMDesc, ///< [in][optional] Pointer to USM memory allocation descriptor. + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM shared memory object +) { + getContext()->logger.debug("==== urUSMSharedAlloc"); + + return getMsanInterceptor()->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, AllocType::SHARED_USM, ppMem); } /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMFree -__urdlllocal ur_result_t UR_APICALL urUSMFree( +ur_result_t UR_APICALL urUSMFree( ur_context_handle_t hContext, ///< [in] handle of the context object void *pMem ///< [in] pointer to USM memory object ) { @@ -1663,6 +1700,8 @@ ur_result_t urGetUSMProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::msan::urUSMDeviceAlloc; + pDdiTable->pfnHostAlloc = ur_sanitizer_layer::msan::urUSMHostAlloc; + pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::msan::urUSMSharedAlloc; pDdiTable->pfnFree = ur_sanitizer_layer::msan::urUSMFree; return result; diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index cdaa088297..d40457cb5d 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -46,18 +46,36 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, - size_t Size, void **ResultPtr) { + size_t Size, AllocType Type, + void **ResultPtr) { auto ContextInfo = getContextInfo(Context); - std::shared_ptr DeviceInfo = getDeviceInfo(Device); + std::shared_ptr DeviceInfo = + Device ? getDeviceInfo(Device) : nullptr; void *Allocated = nullptr; - UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, Properties, Pool, Size, &Allocated)); + if (Type == AllocType::DEVICE_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, Size, &Allocated)); + } else if (Type == AllocType::HOST_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc( + Context, Properties, Pool, Size, &Allocated)); + } else if (Type == AllocType::SHARED_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( + Context, Device, Properties, Pool, Size, &Allocated)); + } *ResultPtr = Allocated; + ContextInfo->MaxAllocatedSize = + std::max(ContextInfo->MaxAllocatedSize, Size); + + // For host/shared usm, we only record the alloc size. + if (Type != AllocType::DEVICE_USM) { + return UR_RESULT_SUCCESS; + } + auto AI = std::make_shared(MsanAllocInfo{(uptr)Allocated, Size, @@ -144,6 +162,12 @@ ur_result_t MsanInterceptor::registerProgram(ur_program_handle_t Program) { return Result; } + getContext()->logger.info("registerDeviceGlobals"); + Result = registerDeviceGlobals(Program); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + return Result; } @@ -212,6 +236,56 @@ ur_result_t MsanInterceptor::registerSpirKernels(ur_program_handle_t Program) { return UR_RESULT_SUCCESS; } +ur_result_t +MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { + std::vector Devices = GetDevices(Program); + assert(Devices.size() != 0 && "No devices in registerDeviceGlobals"); + auto Context = GetContext(Program); + auto ContextInfo = getContextInfo(Context); + auto ProgramInfo = getProgramInfo(Program); + assert(ProgramInfo != nullptr && "unregistered program!"); + + for (auto Device : Devices) { + ManagedQueue Queue(Context, Device); + + size_t MetadataSize; + void *MetadataPtr; + auto Result = + getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer( + Device, Program, kSPIR_MsanDeviceGlobalMetadata, &MetadataSize, + &MetadataPtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.info("No device globals"); + continue; + } + + const uint64_t NumOfDeviceGlobal = + MetadataSize / sizeof(DeviceGlobalInfo); + assert((MetadataSize % sizeof(DeviceGlobalInfo) == 0) && + "DeviceGlobal metadata size is not correct"); + std::vector GVInfos(NumOfDeviceGlobal); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, &GVInfos[0], MetadataPtr, + sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Device Global[{}] Read Failed: {}", + kSPIR_MsanDeviceGlobalMetadata, Result); + return Result; + } + + auto DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + for (size_t i = 0; i < NumOfDeviceGlobal; i++) { + const auto &GVInfo = GVInfos[i]; + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, + GVInfo.Size, 0)); + ContextInfo->MaxAllocatedSize = + std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size); + } + } + + return UR_RESULT_SUCCESS; +} + ur_result_t MsanInterceptor::insertContext(ur_context_handle_t Context, std::shared_ptr &CI) { std::scoped_lock Guard(m_ContextMapMutex); @@ -379,10 +453,14 @@ ur_result_t MsanInterceptor::prepareLaunch( } // Set LaunchInfo + auto ContextInfo = getContextInfo(LaunchInfo.Context); LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; LaunchInfo.Data->DeviceTy = DeviceInfo->Type; LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, + ContextInfo->MaxAllocatedSize, &LaunchInfo.Data->CleanShadow)); getContext()->logger.info( "launch_info {} (GlobalShadow={}, Device={}, Debug={})", @@ -465,6 +543,11 @@ ur_result_t USMLaunchInfo::initialize() { USMLaunchInfo::~USMLaunchInfo() { [[maybe_unused]] ur_result_t Result; if (Data) { + if (Data->CleanShadow) { + Result = getContext()->urDdiTable.USM.pfnFree(Context, + Data->CleanShadow); + assert(Result == UR_RESULT_SUCCESS); + } Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data); assert(Result == UR_RESULT_SUCCESS); } diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 81c237380f..fea52741f3 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -121,6 +121,7 @@ struct ProgramInfo { struct ContextInfo { ur_context_handle_t Handle; + size_t MaxAllocatedSize = 1024; std::atomic RefCount = 1; std::vector DeviceList; @@ -159,6 +160,11 @@ struct USMLaunchInfo { ur_result_t initialize(); }; +struct DeviceGlobalInfo { + uptr Size; + uptr Addr; +}; + struct SpirKernelInfo { uptr KernelName; uptr Size; @@ -174,7 +180,7 @@ class MsanInterceptor { ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, - void **ResultPtr); + AllocType Type, void **ResultPtr); ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); ur_result_t registerProgram(ur_program_handle_t Program); @@ -261,6 +267,7 @@ class MsanInterceptor { std::shared_ptr &DeviceInfo); ur_result_t registerSpirKernels(ur_program_handle_t Program); + ur_result_t registerDeviceGlobals(ur_program_handle_t Program); private: std::unordered_map> diff --git a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index 32e8f36552..0888c9dc75 100644 --- a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -53,7 +53,7 @@ struct MsanLaunchInfo { MsanErrorReport Report; - uint8_t CleanShadow[128] = {}; + void *CleanShadow = nullptr; }; // Based on the observation, only the last 24 bits of the address of the private diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 2cdf8600d2..2573b4caa5 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -227,15 +227,10 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( VirtualMemMaps[MappedPtr].first = PhysicalMem; } - // We don't need to record virtual memory map for null pointer, - // since it doesn't have an alloc info. - if (Ptr == 0) { - continue; + auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr); + if (AllocInfoItOp) { + VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second); } - - auto AllocInfoIt = getMsanInterceptor()->findAllocInfoByAddress(Ptr); - assert(AllocInfoIt); - VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); } return UR_RESULT_SUCCESS;