Skip to content

Commit

Permalink
SWDEV-458289/SWDEV-451964 - Limit gpu single allocation percentage fo…
Browse files Browse the repository at this point in the history
…r gfx940 only

Change-Id: Iadcdadd734e7aeeb23742e426353defa972d3ad5
(cherry picked from commit dbac297)
  • Loading branch information
Sourabh Betigeri authored and yanyao-wang committed May 3, 2024
1 parent 8a82c14 commit 7b75645
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
2 changes: 1 addition & 1 deletion rocclr/device/pal/paldevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.errorCorrectionSupport_ = false;

if (settings().apuSystem_) {
info_.hostUnifiedMemory_ = true;
info_.hostUnifiedMemory_ = 1;
}

info_.profilingTimerResolution_ = 1;
Expand Down
20 changes: 17 additions & 3 deletions rocclr/device/rocm/rocdevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1331,6 +1331,19 @@ bool Device::populateOCLDeviceConstants() {

info_.maxWorkItemDimensions_ = 3;

uint8_t memory_properties[8];
// Get the memory property from ROCr.
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(bkendDevice_,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES,
memory_properties)) {
LogError("HSA_AGENT_INFO_AMD_MEMORY_PROPERTIES query failed");
}

// Check if the device is APU
if (hsa_flag_isset64(memory_properties, HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)) {
info_.hostUnifiedMemory_ = 1;
}

if (settings().enableLocalMemory_ && gpuvm_segment_.handle != 0) {
size_t global_segment_size = 0;
if (HSA_STATUS_SUCCESS != hsa_amd_memory_pool_get_info(gpuvm_segment_,
Expand All @@ -1349,8 +1362,9 @@ bool Device::populateOCLDeviceConstants() {
GPU_SINGLE_ALLOC_PERCENT = 75;
}
}
// Limit gpu single allocation percentage on MI300
if ((isa().versionMajor() == 9) && (isa().versionMinor() == 4)) {
// Limit gpu single allocation percentage for gfx940
if ((isa().versionMajor() == 9) && (isa().versionMinor() == 4) &&
(isa().versionStepping() == 0) && (info_.hostUnifiedMemory_ == 1)) {
if (gpu_agents_.size() == 1 || p2p_agents_.size() == 0) {
if (flagIsDefault(GPU_SINGLE_ALLOC_PERCENT)) {
GPU_SINGLE_ALLOC_PERCENT = 60;
Expand Down Expand Up @@ -1432,7 +1446,7 @@ bool Device::populateOCLDeviceConstants() {

if (agent_profile_ == HSA_PROFILE_FULL) { // full-profile = participating in coherent memory,
// base-profile = NUMA based non-coherent memory
info_.hostUnifiedMemory_ = true;
info_.hostUnifiedMemory_ = 1;
}
info_.memBaseAddrAlign_ =
8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ? sizeof(int64_t[16]) : MEMOBJ_BASE_ADDR_ALIGN);
Expand Down

0 comments on commit 7b75645

Please sign in to comment.