From efd6f31fb44ea21846c0292d5ea31a3e05aa3af3 Mon Sep 17 00:00:00 2001 From: Joey Date: Mon, 27 May 2024 20:44:36 +0800 Subject: [PATCH] [VL] Support DecimalType for approx_count_distinct (#5868) [VL] Support DecimalType for approx_count_distinct. --- .../gluten/extension/HLLRewriteRule.scala | 1 + .../VeloxAggregateFunctionsSuite.scala | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala b/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala index cb1e626a1ea6..03819fc102ab 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala @@ -73,6 +73,7 @@ case class HLLRewriteRule(spark: SparkSession) extends Rule[LogicalPlan] { case LongType => true case ShortType => true case StringType => true + case _: DecimalType => true case _ => false } } diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index faa361edf5aa..ffed6373123e 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -571,6 +571,26 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu } } + test("approx_count_distinct decimal") { + // The data type of l_discount is decimal. + runQueryAndCompare(""" + |select approx_count_distinct(l_discount) from lineitem; + |""".stripMargin) { + checkGlutenOperatorMatch[HashAggregateExecTransformer] + } + runQueryAndCompare( + "select approx_count_distinct(l_discount), count(distinct l_orderkey) from lineitem") { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[HashAggregateExecTransformer] + }) == 0) + } + } + } + test("max_by") { runQueryAndCompare(s""" |select max_by(l_linenumber, l_comment) from lineitem;