Skip to content

Commit

Permalink
Merge pull request #2064 from igchor/memory_buffer
Browse files Browse the repository at this point in the history
[L0 v2] add inital memory buffer support
  • Loading branch information
pbalcer authored Sep 9, 2024
2 parents 88c3287 + fc3f254 commit b8b76f4
Show file tree
Hide file tree
Showing 24 changed files with 999 additions and 1,289 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
compiler: [{c: gcc, cxx: g++}]
libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF']
pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON', '-DUMF_ENABLE_POOL_TRACKING=OFF']
latency_tracking: ['-DUMF_ENABLE_LATENCY_TRACKING=OFF']
latency_tracking: ['-DUR_ENABLE_LATENCY_HISTOGRAM=OFF']
include:
- os: 'ubuntu-22.04'
build_type: Release
Expand All @@ -40,7 +40,7 @@ jobs:
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: clang, cxx: clang++}
latency_tracking: '-DUMF_ENABLE_LATENCY_TRACKING=ON'
latency_tracking: '-DUR_ENABLE_LATENCY_HISTOGRAM=ON'
runs-on: ${{ (matrix.os == 'ubuntu-22.04' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }}

steps:
Expand Down
6 changes: 6 additions & 0 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ if(UR_BUILD_ADAPTER_L0)
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
Expand All @@ -136,6 +137,7 @@ if(UR_BUILD_ADAPTER_L0)
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
)

Expand Down Expand Up @@ -199,13 +201,15 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
Expand All @@ -217,6 +221,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
Expand All @@ -229,6 +234,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
Expand Down
33 changes: 33 additions & 0 deletions source/adapters/level_zero/helpers/memory_helpers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//===--------- memory_helpers.cpp - Level Zero Adapter -------------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "memory_helpers.hpp"
#include "../common.hpp"

ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr) {
// TODO: use UMF once
// https://github.com/oneapi-src/unified-memory-framework/issues/687 is
// implemented
ZeStruct<ze_memory_allocation_properties_t> zeMemoryAllocationProperties;
ZE2UR_CALL_THROWS(zeMemGetAllocProperties,
(hContext, ptr, &zeMemoryAllocationProperties, nullptr));
return zeMemoryAllocationProperties.type;
}

bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver,
ze_context_handle_t hContext, void *ptr, size_t size) {
if (ZeUSMImport.Enabled && ptr != nullptr &&
getMemoryType(hContext, ptr) == ZE_MEMORY_TYPE_UNKNOWN) {
// Promote the host ptr to USM host memory
ZeUSMImport.doZeUSMImport(hTranslatedDriver, ptr, size);
return true;
}
return false;
}
23 changes: 23 additions & 0 deletions source/adapters/level_zero/helpers/memory_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===--------- memory_helpers.hpp - Level Zero Adapter -------------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <ur_api.h>
#include <ze_api.h>

// If USM Import feature is enabled and hostptr is supplied,
// import the hostptr if not already imported into USM.
// Data transfer rate is maximized when both source and destination
// are USM pointers. Promotion of the host pointer to USM thus
// optimizes data transfer performance.
bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver,
ze_context_handle_t hContext, void *ptr, size_t size);

ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr);
28 changes: 5 additions & 23 deletions source/adapters/level_zero/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "context.hpp"
#include "event.hpp"
#include "helpers/memory_helpers.hpp"
#include "image.hpp"
#include "logger/ur_logger.hpp"
#include "queue.hpp"
Expand Down Expand Up @@ -1599,30 +1600,11 @@ ur_result_t urMemBufferCreate(
Host = Properties->pHost;
}

// If USM Import feature is enabled and hostptr is supplied,
// import the hostptr if not already imported into USM.
// Data transfer rate is maximized when both source and destination
// are USM pointers. Promotion of the host pointer to USM thus
// optimizes data transfer performance.
bool HostPtrImported = false;
if (ZeUSMImport.Enabled && Host != nullptr &&
(Flags & UR_MEM_FLAG_USE_HOST_POINTER) != 0) {
// Query memory type of the host pointer
ze_device_handle_t ZeDeviceHandle;
ZeStruct<ze_memory_allocation_properties_t> ZeMemoryAllocationProperties;
ZE2UR_CALL(zeMemGetAllocProperties,
(Context->ZeContext, Host, &ZeMemoryAllocationProperties,
&ZeDeviceHandle));

// If not shared of any type, we can import the ptr
if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) {
// Promote the host ptr to USM host memory
ze_driver_handle_t driverHandle =
Context->getPlatform()->ZeDriverHandleExpTranslated;
ZeUSMImport.doZeUSMImport(driverHandle, Host, Size);
HostPtrImported = true;
}
}
if (Flags & UR_MEM_FLAG_USE_HOST_POINTER)
HostPtrImported =
maybeImportUSM(Context->getPlatform()->ZeDriverHandleExpTranslated,
Context->ZeContext, Host, Size);

_ur_buffer *Buffer = nullptr;
auto HostPtrOrNull = (Flags & UR_MEM_FLAG_USE_HOST_POINTER)
Expand Down
89 changes: 0 additions & 89 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,46 +49,13 @@ ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
ur_mem_flags_t flags, size_t size,
const ur_buffer_properties_t *pProperties,
ur_mem_handle_t *phBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemRetain(ur_mem_handle_t hMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemRelease(ur_mem_handle_t hMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags,
ur_buffer_create_type_t bufferCreateType,
const ur_buffer_region_t *pRegion,
ur_mem_handle_t *phMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem,
ur_device_handle_t hDevice,
ur_native_handle_t *phNativeMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemBufferCreateWithNativeHandle(
ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urMemImageCreateWithNativeHandle(
ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
Expand Down Expand Up @@ -217,48 +184,13 @@ ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex,
size_t argSize,
const ur_kernel_arg_local_properties_t *pProperties) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel,
ur_kernel_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel,
ur_device_handle_t hDevice,
ur_kernel_group_info_t propName,
size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel,
ur_device_handle_t hDevice,
ur_kernel_sub_group_info_t propName,
size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName,
size_t propSize,
const ur_kernel_exec_info_properties_t *pProperties,
const void *pPropValue) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
const ur_kernel_arg_sampler_properties_t *pProperties,
Expand All @@ -267,14 +199,6 @@ urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
const ur_kernel_arg_mem_obj_properties_t *pProperties,
ur_mem_handle_t hArgValue) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urKernelSetSpecializationConstants(
ur_kernel_handle_t hKernel, uint32_t count,
const ur_specialization_constant_info_t *pSpecConstants) {
Expand Down Expand Up @@ -308,13 +232,6 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent,
ur_profiling_info_t propName,
size_t propSize, void *pPropValue,
Expand All @@ -323,12 +240,6 @@ ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urEventWait(uint32_t numEvents,
const ur_event_handle_t *phEventWaitList) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent,
ur_native_handle_t *phNativeEvent) {
logger::error("{} function not implemented!", __FUNCTION__);
Expand Down
6 changes: 6 additions & 0 deletions source/adapters/level_zero/v2/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ ur_result_t urContextGetInfo(ur_context_handle_t hContext,
return ReturnValue(uint32_t(hContext->getDevices().size()));
case UR_CONTEXT_INFO_REFERENCE_COUNT:
return ReturnValue(uint32_t{hContext->RefCount.load()});
case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
// TODO: this is currently not implemented
return ReturnValue(uint8_t{false});
case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT:
// 2D USM fill is not supported.
return ReturnValue(uint8_t{false});
default:
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
Expand Down
37 changes: 37 additions & 0 deletions source/adapters/level_zero/v2/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,41 @@ ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); }
ur_result_t urEventRelease(ur_event_handle_t hEvent) {
return hEvent->release();
}

ur_result_t urEventWait(uint32_t numEvents,
const ur_event_handle_t *phEventWaitList) {
for (uint32_t i = 0; i < numEvents; ++i) {
ZE2UR_CALL(zeEventHostSynchronize,
(phEventWaitList[i]->getZeEvent(), UINT64_MAX));
}
return UR_RESULT_SUCCESS;
}

ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
size_t propValueSize, void *pPropValue,
size_t *pPropValueSizeRet) {
UrReturnHelper returnValue(propValueSize, pPropValue, pPropValueSizeRet);

switch (propName) {
case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: {
auto zeStatus = ZE_CALL_NOCHECK(zeEventQueryStatus, (hEvent->getZeEvent()));

if (zeStatus == ZE_RESULT_NOT_READY) {
return returnValue(UR_EVENT_STATUS_SUBMITTED);
} else {
return returnValue(UR_EVENT_STATUS_COMPLETE);
}
}
case UR_EVENT_INFO_REFERENCE_COUNT: {
return returnValue(hEvent->RefCount.load());
}
default:
logger::error(
"Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})",
propName, logger::toHex(propName));
return UR_RESULT_ERROR_INVALID_VALUE;
}

return UR_RESULT_SUCCESS;
}
} // namespace ur::level_zero
2 changes: 1 addition & 1 deletion source/adapters/level_zero/v2/event_provider_counter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ event_allocation provider_counter::allocate() {
if (freelist.empty()) {
ZeStruct<ze_event_desc_t> desc;
desc.index = 0;
desc.signal = 0;
desc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
desc.wait = 0;
ze_event_handle_t handle;

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/v2/event_provider_normal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ provider_pool::provider_pool(ur_context_handle_t context,
for (int i = 0; i < EVENTS_BURST; ++i) {
ZeStruct<ze_event_desc_t> desc;
desc.index = i;
desc.signal = 0;
desc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
desc.wait = 0;
ZE2UR_CALL_THROWS(zeEventCreate, (pool.get(), &desc, freelist[i].ptr()));
}
Expand Down
Loading

0 comments on commit b8b76f4

Please sign in to comment.