From c09e1a69343352a4818347a2e011c263431fe1f0 Mon Sep 17 00:00:00 2001 From: zhaokuo Date: Mon, 16 Dec 2024 19:47:04 +0800 Subject: [PATCH] fix spill logic --- .../memtarget/RetryOnOomMemoryTarget.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java index baa5b7324dcd..1a5388d0d187 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java @@ -36,21 +36,30 @@ public class RetryOnOomMemoryTarget implements TreeMemoryTarget { public long borrow(long size) { long granted = target.borrow(size); if (granted < size) { - - LOGGER.info( - "Exceed Spark perTaskLimit with maxTaskSizeDynamic when " - + "require:{} got:{}, try spill all.", - size, - granted); - final long spilled = TreeMemoryTargets.spillTree(target, Long.MAX_VALUE); + LOGGER.info("Retrying spill require:{} got:{}", size, granted); + final long spilled = retryingSpill(Long.MAX_VALUE); final long remaining = size - granted; if (spilled >= remaining) { granted += target.borrow(remaining); } + LOGGER.info("Retrying spill spilled:{} final granted:{}", spilled, granted); } return granted; } + private long retryingSpill(long size) { + TreeMemoryTarget rootTarget = target; + while (true) { + try { + rootTarget = rootTarget.parent(); + } catch (IllegalStateException e) { + // Reached the root node + break; + } + } + return TreeMemoryTargets.spillTree(rootTarget, size); + } + @Override public long repay(long size) { return target.repay(size);