Skip to content

Commit

Permalink
add checks
Browse files Browse the repository at this point in the history
  • Loading branch information
lgbo-ustc committed Aug 20, 2024
1 parent db7d4be commit 2cd649e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,11 @@ object CHBackendSettings extends BackendSettingsApi with Logging {
}
}

def getBroadcastThreshold: Long = {
val conf = SQLConf.get
conf
.getConf(SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD)
.getOrElse(conf.autoBroadcastJoinThreshold)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,18 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
CHExecUtil.buildSideRDD(dataSize, newChild).collect

val batches = countsAndBytes.map(_._2)
val totalBatchesBytes = batches.map(_.length).sum
if (
totalBatchesBytes < 0 || totalBatchesBytes.toLong > CHBackendSettings.getBroadcastThreshold
) {
throw new GlutenException(
s"Cannot broadcast the table (${totalBatchesBytes}) that is larger than threshold:" +
s" ${CHBackendSettings.getBroadcastThreshold}. Ensure the shuffle written" +
s"bytes is collected properly.")
}
val rawSize = dataSize.value
if (rawSize >= BroadcastExchangeExec.MAX_BROADCAST_TABLE_BYTES) {
throw new SparkException(
throw new GlutenException(
s"Cannot broadcast the table that is larger than 8GB: ${rawSize >> 30} GB")
}
val rowCount = countsAndBytes.map(_._1).sum
Expand Down

0 comments on commit 2cd649e

Please sign in to comment.