From 1ef1534d94c2aeda7a2d13c429b5fb56f3d23597 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 2 Sep 2024 23:17:30 +0800 Subject: [PATCH 1/3] fix cse alias issues --- .../extension/CommonSubexpressionEliminateRule.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala index a3b74366fc7b..2dbe3ed29176 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala @@ -121,7 +121,12 @@ class CommonSubexpressionEliminateRule(session: SparkSession, conf: SQLConf) if (expr.find(_.isInstanceOf[AggregateExpression]).isDefined) { addToEquivalentExpressions(expr, equivalentExpressions) } else { - equivalentExpressions.addExprTree(expr) + expr match { + case alias: Alias => + equivalentExpressions.addExprTree(alias.child) + case _ => + equivalentExpressions.addExprTree(expr) + } } }) From 28ea9b402bd6a76968c62c627ab574bc1167d249 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 3 Sep 2024 10:57:26 +0800 Subject: [PATCH 2/3] fix issue https://github.com/apache/incubator-gluten/issues/7054 --- .../CommonSubexpressionEliminateRule.scala | 5 ++ .../hive/GlutenClickHouseHiveTableSuite.scala | 84 ++++++++++++++++--- 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala index 2dbe3ed29176..52e278b3dace 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/extension/CommonSubexpressionEliminateRule.scala @@ -28,6 +28,11 @@ import org.apache.spark.sql.internal.SQLConf import scala.collection.mutable +// If you want to debug CommonSubexpressionEliminateRule, you can: +// 1. replace all `logTrace` to `logError` +// 2. append two options to spark config +// --conf spark.sql.planChangeLog.level=error +// --conf spark.sql.planChangeLog.batches=all class CommonSubexpressionEliminateRule(session: SparkSession, conf: SQLConf) extends Rule[LogicalPlan] with Logging { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala index f165d7aef69c..5650de5f06eb 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala @@ -985,19 +985,19 @@ class GlutenClickHouseHiveTableSuite } } - test("GLUTEN-4333: fix CSE in aggregate operator") { - def checkOperatorCount[T <: TransformSupport](count: Int)(df: DataFrame)(implicit - tag: ClassTag[T]): Unit = { - if (sparkVersion.equals("3.3")) { - assert( - getExecutedPlan(df).count( - plan => { - plan.getClass == tag.runtimeClass - }) == count, - s"executed plan: ${getExecutedPlan(df)}") - } + def checkOperatorCount[T <: TransformSupport](count: Int)(df: DataFrame)(implicit + tag: ClassTag[T]): Unit = { + if (sparkVersion.equals("3.3")) { + assert( + getExecutedPlan(df).count( + plan => { + plan.getClass == tag.runtimeClass + }) == count, + s"executed plan: ${getExecutedPlan(df)}") } + } + test("GLUTEN-4333: fix CSE in aggregate operator") { val createTableSql = """ |CREATE TABLE `test_cse`( @@ -1262,4 +1262,66 @@ class GlutenClickHouseHiveTableSuite compareResultsAgainstVanillaSpark(selectSql, true, _ => {}) sql(s"drop table if exists $tableName") } + + test("GLUTEN-7054: Fix exception when CSE meets common alias expression") { + val createTableSql = """ + |CREATE TABLE test_tbl_7054 ( + | day STRING, + | event_id STRING, + | event STRUCT< + | event_info: MAP + | > + |); + |""".stripMargin + + val insertDataSql = """ + |INSERT INTO test_tbl_7054 + |VALUES + | ('2024-08-27', '011441004', + | STRUCT(MAP('type', '1', 'action', '8', 'value_vmoney', '100'))), + | ('2024-08-27', '011441004', + | STRUCT(MAP('type', '2', 'action', '8', 'value_vmoney', '200'))), + | ('2024-08-27', '011441004', + | STRUCT(MAP('type', '4', 'action', '8', 'value_vmoney', '300'))); + |""".stripMargin + + val selectSql = """ + |SELECT + | COALESCE(day, 'all') AS daytime, + | COALESCE(type, 'all') AS type, + | COALESCE(value_money, 'all') AS value_vmoney, + | SUM(CASE + | WHEN type IN (1, 2) AND action = 8 THEN value_vmoney + | ELSE 0 + | END) / 60 AS total_value_vmoney + |FROM ( + | SELECT + | day, + | type, + | NVL(CAST(value_vmoney AS BIGINT), 0) AS value_money, + | action, + | type, + | CAST(value_vmoney AS BIGINT) AS value_vmoney + | FROM ( + | SELECT + | day, + | event.event_info["type"] AS type, + | event.event_info["action"] AS action, + | event.event_info["value_vmoney"] AS value_vmoney + | FROM test_tbl_7054 + | WHERE + | day = '2024-08-27' + | AND event_id = '011441004' + | AND event.event_info["type"] IN (1, 2, 4) + | ) a + |) b + |GROUP BY + | day, type, value_money + |""".stripMargin + + spark.sql(createTableSql) + spark.sql(insertDataSql) + runQueryAndCompare(selectSql)(df => checkOperatorCount[ProjectExecTransformer](2)(df)) + spark.sql("DROP TABLE test_tbl_7054") + } } From e83f4fdd38a4a4fca086a84cabdece1099c49b81 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 3 Sep 2024 11:12:18 +0800 Subject: [PATCH 3/3] fix uts --- .../execution/hive/GlutenClickHouseHiveTableSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala index 5650de5f06eb..cbc3aed3607d 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/hive/GlutenClickHouseHiveTableSuite.scala @@ -1271,7 +1271,7 @@ class GlutenClickHouseHiveTableSuite | event STRUCT< | event_info: MAP | > - |); + |) STORED AS PARQUET; |""".stripMargin val insertDataSql = """ @@ -1321,7 +1321,7 @@ class GlutenClickHouseHiveTableSuite spark.sql(createTableSql) spark.sql(insertDataSql) - runQueryAndCompare(selectSql)(df => checkOperatorCount[ProjectExecTransformer](2)(df)) + runQueryAndCompare(selectSql)(df => checkOperatorCount[ProjectExecTransformer](3)(df)) spark.sql("DROP TABLE test_tbl_7054") } }