diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index 469010832f6c5..cf270dc1ca48d 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -24,6 +24,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, DataFrame, Row} import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.window.WindowExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType, StructField, StructType} @@ -215,7 +216,7 @@ class TestOperator extends VeloxWholeStageTransformerSuite { " (partition by l_suppkey order by l_orderkey" + " RANGE BETWEEN 1 PRECEDING AND CURRENT ROW), " + "min(l_comment) over" + - " (partition by l_suppkey order by l_discount" + + " (partition by l_suppkey order by l_linenumber" + " RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) from lineitem ") { checkSparkOperatorMatch[WindowExecTransformer] } @@ -255,6 +256,14 @@ class TestOperator extends VeloxWholeStageTransformerSuite { checkSparkOperatorMatch[WindowExecTransformer] } + // DecimalType as order by column is not supported + runQueryAndCompare( + "select min(l_comment) over" + + " (partition by l_suppkey order by l_discount" + + " RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) from lineitem ") { + checkSparkOperatorMatch[WindowExec] + } + runQueryAndCompare( "select ntile(4) over" + " (partition by l_suppkey order by l_orderkey) from lineitem ") { diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala index 7a8758e9b0f85..73b8ab2607eb4 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala @@ -79,7 +79,10 @@ object PullOutPreProject extends RewriteSingleNode with PullOutProjectHelper { window.windowExpression.exists(_.find { case we: WindowExpression => we.windowSpec.frameSpecification match { - case swf: SpecifiedWindowFrame if needPreComputeRangeFrame(swf) => true + case swf: SpecifiedWindowFrame + if needPreComputeRangeFrame(swf) && supportPreComputeRangeFrame( + we.windowSpec.orderSpec) => + true case _ => false } case _ => false diff --git a/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala b/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala index 10638dba5c9ea..12055f9e97210 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala @@ -22,6 +22,7 @@ import org.apache.gluten.exception.{GlutenException, GlutenNotSupportException} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction} import org.apache.spark.sql.execution.aggregate._ +import org.apache.spark.sql.types.{ByteType, DateType, IntegerType, LongType, ShortType} import java.util.concurrent.atomic.AtomicInteger @@ -174,6 +175,16 @@ trait PullOutProjectHelper { (needPreComputeRangeFrameBoundary(swf.lower) || needPreComputeRangeFrameBoundary(swf.upper)) } + protected def supportPreComputeRangeFrame(sortOrders: Seq[SortOrder]): Boolean = { + sortOrders.forall { + _.dataType match { + case ByteType | ShortType | IntegerType | LongType | DateType => true + // Only integral type & date type are supported for sort key with Range Frame + case _ => false + } + } + } + protected def rewriteWindowExpression( we: WindowExpression, orderSpecs: Seq[SortOrder],