diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala index 659c3cbcade2..903523791a1b 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -26,7 +26,7 @@ import org.apache.gluten.extension.columnar.AddFallbackTagRule import org.apache.gluten.extension.columnar.MiscColumnarRules.TransformPreOverrides import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode, WindowFunctionNode} -import org.apache.gluten.utils.{CHAggUtil, CHJoinValidateUtil, UnknownJoinStrategy} +import org.apache.gluten.utils.{CHJoinValidateUtil, UnknownJoinStrategy} import org.apache.gluten.vectorized.CHColumnarBatchSerializer import org.apache.spark.ShuffleDependency @@ -160,7 +160,7 @@ class CHSparkPlanExecApi extends SparkPlanExecApi with Logging { child: SparkPlan): HashAggregateExecBaseTransformer = CHHashAggregateExecTransformer( requiredChildDistributionExpressions, - CHAggUtil.distinctIgnoreQualifier(groupingExpressions), + groupingExpressions.distinct, aggregateExpressions, aggregateAttributes, initialInputBufferOffset, diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala index b8ddedd3005a..d641c05cd62e 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala @@ -26,7 +26,6 @@ import org.apache.gluten.substrait.{AggregationParams, SubstraitContext} import org.apache.gluten.substrait.expression.{AggregateFunctionNode, ExpressionBuilder, ExpressionNode} import org.apache.gluten.substrait.extensions.{AdvancedExtensionNode, ExtensionBuilder} import org.apache.gluten.substrait.rel.{RelBuilder, RelNode} -import org.apache.gluten.utils.CHAggUtil import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ @@ -430,15 +429,6 @@ case class CHHashAggregateExecPullOutHelper( aggregateAttr.toList } - override def allAggregateResultAttributes( - groupingExpressions: Seq[NamedExpression]): List[Attribute] = { - if (aggregateExpressions.nonEmpty) { - super.allAggregateResultAttributes(groupingExpressions) - } else { - super.allAggregateResultAttributes(CHAggUtil.distinctIgnoreQualifier(groupingExpressions)) - } - } - protected def getAttrForAggregateExpr( exp: AggregateExpression, aggregateAttributeList: Seq[Attribute], diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHAggUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHAggUtil.scala deleted file mode 100644 index ccab7c295013..000000000000 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHAggUtil.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.gluten.utils - -import org.apache.spark.internal.Logging -import org.apache.spark.sql.catalyst.expressions.NamedExpression - -import scala.util.control.Breaks.{break, breakable} - -object CHAggUtil extends Logging { - def distinctIgnoreQualifier(expressions: Seq[NamedExpression]): Seq[NamedExpression] = { - var dist = List[NamedExpression]() - for (i <- expressions.indices) { - var k = -1 - breakable { - for (j <- 0 to i - i) - if ( - j != i && - expressions(i).name == expressions(j).name && - expressions(i).exprId == expressions(j).exprId && - expressions(i).dataType == expressions(j).dataType && - expressions(i).nullable == expressions(j).nullable - ) { - k = j - break - } - } - if (k < 0) dist = dist :+ expressions(i) - } - dist - } -} diff --git a/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE.csv b/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE.csv new file mode 100755 index 000000000000..ab6a2e4a41f7 --- /dev/null +++ b/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE.csv @@ -0,0 +1,18 @@ +10000000157,132342342,124123,3123,22.334,1234.244,1434.242343,1,1,1,''ATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOM','ATOM','ATOM',12,2013-3-31,2013-3-31,true,1 +10000000157,132322342,14123,313,12.34,124.44,14.242343,2,7,1,'FT','FT','FT',2,2014-3-31,2012-3-21,true,1 +10000000158,332342342,1241,31233,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2012-3-31,2012-3-31,true,1 +10000000158,,,,,,,,,,,,,,,2012-3-21 10:10:10.789,, +10000000159,,,,,,,,,,,,,,,,, +10000000160,,,,,,,,,,,,,,,,, +10000000160,,,,,,,,,,,,,,,,, +10000000161,332342342,1241,31233,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2012-3-31,2012-3-31,true,999.99 +10000000162,332342342,1241,31233,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2012-3-31,2012-3-31,true,999.99 +10000000163,332342342,1241,31233,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2012-3-31,2012-3-31,true,999.99 +10000000164,332342342,1241,10,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2014-4-1,2012-3-31,true,10.11 +10000000165,332342342,1241,11,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2014-4-1,2012-3-31,true,10.12 +10000000165,332342342,1241,11,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2014-4-2,2012-3-31,true,10.11 +10000000166,332342342,1241,12,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2014-4-2,2012-3-31,true,10.11 +10000000167,332342342,1241,17,29.334,123422.244,1434.24222343,5,11,3,'中国','中国','中国',12,2014-4-2,2012-3-31,true,10.13 +10000000168,332342342,1241,17,29.334,123422.244,1434.24222343,5,11,3,Ch_na,'中国','中国',12,2014-4-2,2012-3-31,true,10.13 +10000000169,332342342,1241,17,29.334,123422.244,1434.24222343,5,11,3,Ch%na,'中国','中国',12,2014-4-2,2012-3-31,true,10.13 +10000000170,332342342,1241,17,29.334,123422.244,1434.24222343,5,11,3,China,'中国','中国',12,2014-4-2,2012-3-31,true,10.13 diff --git a/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE1.csv b/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE1.csv new file mode 100755 index 000000000000..4fdc491de65f --- /dev/null +++ b/backends-clickhouse/src/test/resources/csv-data/default/TEST_MEASURE1.csv @@ -0,0 +1,4 @@ +10000000157,132342342,124123,3123,22.334,1234.244,1434.242343,1,1,1,''ATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOMATOM','ATOM','ATOM',12,2013-3-31,2013-3-31,true +10000000158,,,,,,,,,,,,,,,, +10000000159,,,,,,,,,,,,,,,, +10000000160,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala index bb99c6bd1ef0..348b8c85200b 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala @@ -1301,6 +1301,95 @@ class GlutenClickHouseFileFormatSuite _ => {} ) } + test("GLUTEN-7367: Memory limit exceeded") { + val file_TEST_MEASURE = csvDataPath + "/default/TEST_MEASURE.csv" + val TEST_MEASURE = StructType.apply( + Seq( + StructField.apply("ID1", LongType, nullable = false), + StructField.apply("ID2", LongType, nullable = false), + StructField.apply("ID3", LongType, nullable = false), + StructField.apply("ID4", IntegerType, nullable = false), + StructField.apply("PRICE1", FloatType, nullable = false), + StructField.apply("PRICE2", DoubleType, nullable = false), + StructField.apply("PRICE3", DecimalType(19, 4), nullable = false), + StructField.apply("PRICE5", ShortType, nullable = false), + StructField.apply("PRICE6", ByteType, nullable = false), + StructField.apply("PRICE7", ShortType, nullable = false), + StructField.apply("NAME1", StringType, nullable = true), + StructField.apply("NAME2", StringType, nullable = true), + StructField.apply("NAME3", StringType, nullable = true), + StructField.apply("NAME4", ByteType, nullable = false), + StructField.apply("TIME1", DateType, nullable = false), + StructField.apply("TIME2", TimestampType, nullable = false), + StructField.apply("FLAG", BooleanType, nullable = false) + )) + spark.read + .schema(TEST_MEASURE) + .csv(file_TEST_MEASURE) + .toDF() + .createTempView("TEST_MEASURE") + val file_TEST_MEASURE1 = csvDataPath + "/default/TEST_MEASURE1.csv" + val TEST_MEASURE1 = StructType.apply( + Seq( + StructField.apply("ID1", LongType, nullable = false), + StructField.apply("ID2", LongType, nullable = false), + StructField.apply("ID3", LongType, nullable = false), + StructField.apply("ID4", IntegerType, nullable = false), + StructField.apply("PRICE1", FloatType, nullable = false), + StructField.apply("PRICE2", DoubleType, nullable = false), + StructField.apply("PRICE3", DecimalType(19, 4), nullable = false), + StructField.apply("PRICE5", ShortType, nullable = false), + StructField.apply("PRICE6", ByteType, nullable = false), + StructField.apply("PRICE7", ShortType, nullable = false), + StructField.apply("NAME1", StringType, nullable = false), + StructField.apply("NAME2", StringType, nullable = false), + StructField.apply("NAME3", StringType, nullable = false), + StructField.apply("NAME4", ByteType, nullable = false), + StructField.apply("TIME1", DateType, nullable = false), + StructField.apply("TIME2", TimestampType, nullable = false), + StructField.apply("FLAG", BooleanType, nullable = false) + )) + spark.read + .schema(TEST_MEASURE1) + .csv(file_TEST_MEASURE1) + .toDF() + .createTempView("TEST_MEASURE1") + + withSQLConf( + (CHConf.runtimeSettings("use_excel_serialization"), "false"), + ("spark.gluten.sql.text.input.empty.as.default", "true")) { + compareResultsAgainstVanillaSpark( + """ + | select * from TEST_MEASURE + |""".stripMargin, + compareResult = true, + _ => {} + ) + + compareResultsAgainstVanillaSpark( + """ + | select * from TEST_MEASURE1 + |""".stripMargin, + compareResult = true, + _ => {} + ) + + val sqlStr = + """select `TEST_MEASURE`.`ID1`, + | count(distinct `TEST_MEASURE`.`ID1`, `TEST_MEASURE`.`ID2`, `TEST_MEASURE`.`ID3`, + | `TEST_MEASURE`.`ID4`,`TEST_MEASURE`.`PRICE1`, `TEST_MEASURE`.`PRICE2`, + | `TEST_MEASURE`.`PRICE3`, `TEST_MEASURE`.`PRICE5`,`TEST_MEASURE`.`PRICE6`, + | `TEST_MEASURE`.`PRICE7`, `TEST_MEASURE`.`NAME1`, `TEST_MEASURE`.`NAME2`, + | `TEST_MEASURE`.`NAME3`, `TEST_MEASURE`.`NAME4`, `TEST_MEASURE`.`TIME1`, + | `TEST_MEASURE`.`TIME2`,`TEST_MEASURE`.`FLAG`), + | 1 + |from `TEST_MEASURE` + | left join `TEST_MEASURE1` on `TEST_MEASURE`.`ID1` = `TEST_MEASURE1`.`ID1` + |group by `TEST_MEASURE`.`ID1`""".stripMargin + + compareResultsAgainstVanillaSpark(sqlStr, compareResult = true, _ => {}) + } + } test("issues-3609 int read test") { withSQLConf((CHConf.runtimeSettings("use_excel_serialization.number_force"), "false")) { diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala index 958dbf6397bc..e7d573ca5e7d 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala @@ -122,50 +122,4 @@ class GlutenHiveSQLQueryCHSuite extends GlutenHiveSQLQuerySuiteBase { ignoreIfNotExists = true, purge = false) } - - testGluten("GLUTEN-7096: Same names in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | )) group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } - - testGluten("GLUTEN-7096: Same names with different qualifier in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | ) t1 ) t2 group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala index 958dbf6397bc..e7d573ca5e7d 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala @@ -122,50 +122,4 @@ class GlutenHiveSQLQueryCHSuite extends GlutenHiveSQLQuerySuiteBase { ignoreIfNotExists = true, purge = false) } - - testGluten("GLUTEN-7096: Same names in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | )) group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } - - testGluten("GLUTEN-7096: Same names with different qualifier in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | ) t1 ) t2 group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala index 958dbf6397bc..e7d573ca5e7d 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala @@ -122,50 +122,4 @@ class GlutenHiveSQLQueryCHSuite extends GlutenHiveSQLQuerySuiteBase { ignoreIfNotExists = true, purge = false) } - - testGluten("GLUTEN-7096: Same names in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | )) group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } - - testGluten("GLUTEN-7096: Same names with different qualifier in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | ) t1 ) t2 group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala index 958dbf6397bc..e7d573ca5e7d 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQueryCHSuite.scala @@ -122,50 +122,4 @@ class GlutenHiveSQLQueryCHSuite extends GlutenHiveSQLQuerySuiteBase { ignoreIfNotExists = true, purge = false) } - - testGluten("GLUTEN-7096: Same names in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | )) group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } - - testGluten("GLUTEN-7096: Same names with different qualifier in group by may cause exception") { - sql("create table if not exists test_7096 (day string, rtime int, uid string, owner string)") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-01', 123, 'user1', 'owner1')") - sql("insert into test_7096 values ('2024-09-02', 567, 'user2', 'owner2')") - val query = - """ - |select days, rtime, uid, owner, day1 - |from ( - | select day1 as days, rtime, uid, owner, day1 - | from ( - | select distinct coalesce(day, "today") as day1, rtime, uid, owner - | from test_7096 where day = '2024-09-01' - | ) t1 ) t2 group by days, rtime, uid, owner, day1 - |""".stripMargin - val df = sql(query) - checkAnswer(df, Seq(Row("2024-09-01", 123, "user1", "owner1", "2024-09-01"))) - spark.sessionState.catalog.dropTable( - TableIdentifier("test_7096"), - ignoreIfNotExists = true, - purge = false) - } }