From 65bb8343be4a5bbd0da3bbdc85360f97f04f430e Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Mon, 17 Jun 2024 16:39:23 +0800 Subject: [PATCH 1/2] Refactor GlutenV1WriteCommandSuite and GlutenInsertSuite, so we can compile spark-ut with spark-3.5 profile --- gluten-ut/spark35/pom.xml | 44 +++++++++++++++++++ .../GlutenColumnarWriteTestSupport.scala | 26 +++++++++++ .../GlutenColumnarWriteTestSupport.scala | 27 ++++++++++++ .../GlutenV1WriteCommandSuite.scala | 12 ++--- .../spark/sql/sources/GlutenInsertSuite.scala | 24 +++++----- 5 files changed, 116 insertions(+), 17 deletions(-) create mode 100644 gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala create mode 100644 gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala diff --git a/gluten-ut/spark35/pom.xml b/gluten-ut/spark35/pom.xml index cf2129389a6e..2bf1c93a0052 100644 --- a/gluten-ut/spark35/pom.xml +++ b/gluten-ut/spark35/pom.xml @@ -63,6 +63,28 @@ test + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-sources + generate-sources + + add-test-source + + + + src/test/backends-clickhouse + + + + + + + backends-velox @@ -155,6 +177,28 @@ 2.19.0 + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-sources + generate-sources + + add-test-source + + + + src/test/backends-velox + + + + + + + diff --git a/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala new file mode 100644 index 000000000000..43b83afe9af3 --- /dev/null +++ b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten + +import org.apache.spark.sql.execution.SparkPlan + +trait GlutenColumnarWriteTestSupport { + + def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = { + throw new UnsupportedOperationException("Clickhouse Backend does not support write files") + } +} diff --git a/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala new file mode 100644 index 000000000000..c7ad606bcf8d --- /dev/null +++ b/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten + +import org.apache.spark.sql.execution.{SparkPlan, VeloxColumnarWriteFilesExec} + +trait GlutenColumnarWriteTestSupport { + + def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = { + assert(sparkPlan.isInstanceOf[VeloxColumnarWriteFilesExec]) + sparkPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child + } +} diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala index 3d277b94cc3e..fcaf75a4d5c1 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala @@ -16,12 +16,13 @@ */ package org.apache.spark.sql.execution.datasources +import org.apache.gluten.GlutenColumnarWriteTestSupport import org.apache.gluten.execution.SortExecTransformer import org.apache.spark.sql.GlutenSQLTestsBaseTrait import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, NullsFirst, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort} -import org.apache.spark.sql.execution.{QueryExecution, SortExec, VeloxColumnarWriteFilesExec} +import org.apache.spark.sql.execution.{QueryExecution, SortExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, StringType} @@ -96,7 +97,8 @@ trait GlutenV1WriteCommandSuiteBase extends V1WriteCommandSuiteBase { class GlutenV1WriteCommandSuite extends V1WriteCommandSuite with GlutenV1WriteCommandSuiteBase - with GlutenSQLTestsBaseTrait { + with GlutenSQLTestsBaseTrait + with GlutenColumnarWriteTestSupport { testGluten( "SPARK-41914: v1 write with AQE and in-partition sorted - non-string partition column") { @@ -122,8 +124,7 @@ class GlutenV1WriteCommandSuite val executedPlan = FileFormatWriter.executedPlan.get val plan = if (enabled) { - assert(executedPlan.isInstanceOf[VeloxColumnarWriteFilesExec]) - executedPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child + checkWriteFilesAndGetChild(executedPlan) } else { executedPlan.transformDown { case a: AdaptiveSparkPlanExec => a.executedPlan } } @@ -204,8 +205,7 @@ class GlutenV1WriteCommandSuite val executedPlan = FileFormatWriter.executedPlan.get val plan = if (enabled) { - assert(executedPlan.isInstanceOf[VeloxColumnarWriteFilesExec]) - executedPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child + checkWriteFilesAndGetChild(executedPlan) } else { executedPlan.transformDown { case a: AdaptiveSparkPlanExec => a.executedPlan } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala index 2814c2e8cba6..468226deed33 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.sql.sources +import org.apache.gluten.GlutenColumnarWriteTestSupport import org.apache.gluten.execution.SortExecTransformer import org.apache.gluten.extension.GlutenPlan @@ -24,7 +25,7 @@ import org.apache.spark.executor.OutputMetrics import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.execution.{CommandResultExec, QueryExecution, VeloxColumnarWriteFilesExec} +import org.apache.spark.sql.execution.{CommandResultExec, QueryExecution, SparkPlan} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.command.DataWritingCommandExec import org.apache.spark.sql.execution.metric.SQLMetric @@ -38,7 +39,8 @@ import java.io.{File, IOException} class GlutenInsertSuite extends InsertSuite with GlutenSQLTestsBaseTrait - with AdaptiveSparkPlanHelper { + with AdaptiveSparkPlanHelper + with GlutenColumnarWriteTestSupport { override def sparkConf: SparkConf = { super.sparkConf.set("spark.sql.leafNodeDefaultParallelism", "1") @@ -60,13 +62,13 @@ class GlutenInsertSuite super.afterAll() } - private def checkAndGetWriteFiles(df: DataFrame): VeloxColumnarWriteFilesExec = { + private def checkWriteFilesAndGetChild(df: DataFrame): (SparkPlan, SparkPlan) = { val writeFiles = stripAQEPlan( df.queryExecution.executedPlan .asInstanceOf[CommandResultExec] .commandPhysicalPlan).children.head - assert(writeFiles.isInstanceOf[VeloxColumnarWriteFilesExec]) - writeFiles.asInstanceOf[VeloxColumnarWriteFilesExec] + val child = checkWriteFilesAndGetChild(writeFiles) + (writeFiles, child) } testGluten("insert partition table") { @@ -97,7 +99,7 @@ class GlutenInsertSuite val df = spark.sql("INSERT INTO TABLE pt partition(pt='a') SELECT * FROM VALUES(1, 'a'),(2, 'b')") spark.sparkContext.listenerBus.waitUntilEmpty() - checkAndGetWriteFiles(df) + checkWriteFilesAndGetChild(df) assert(taskMetrics.bytesWritten > 0) assert(taskMetrics.recordsWritten == 2) @@ -135,13 +137,13 @@ class GlutenInsertSuite private def validateDynamicPartitionWrite( df: DataFrame, expectedPartitionNames: Set[String]): Unit = { - val writeFiles = checkAndGetWriteFiles(df) + val (writeFiles, writeChild) = checkWriteFilesAndGetChild(df) assert( writeFiles .find(_.isInstanceOf[SortExecTransformer]) .isEmpty) // all operators should be transformed - assert(writeFiles.child.find(!_.isInstanceOf[GlutenPlan]).isEmpty) + assert(writeChild.find(!_.isInstanceOf[GlutenPlan]).isEmpty) val parts = spark.sessionState.catalog.listPartitionNames(TableIdentifier("pt")).toSet assert(parts == expectedPartitionNames) @@ -209,7 +211,7 @@ class GlutenInsertSuite spark.sql("CREATE TABLE t (c1 int, c2 string) USING PARQUET") val df = spark.sql("INSERT OVERWRITE TABLE t SELECT c1, c2 FROM source SORT BY c1") - val writeFiles = checkAndGetWriteFiles(df) + val (writeFiles, _) = checkWriteFilesAndGetChild(df) assert(writeFiles.find(x => x.isInstanceOf[SortExecTransformer]).isDefined) checkAnswer(spark.sql("SELECT * FROM t"), spark.sql("SELECT * FROM source SORT BY c1")) } @@ -244,7 +246,7 @@ class GlutenInsertSuite spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT id as c1, id % 3 as c2 FROM range(10)") spark.sql("CREATE TABLE t2 (c1 long, c2 long) USING PARQUET") val df = spark.sql("INSERT INTO TABLE t2 SELECT c2, count(*) FROM t1 GROUP BY c2") - checkAndGetWriteFiles(df) + checkWriteFilesAndGetChild(df) } } @@ -257,7 +259,7 @@ class GlutenInsertSuite spark.sql("INSERT INTO TABLE t1 VALUES(1, 1),(2, 2)") spark.sql("CREATE TABLE t2 (c1 long, c2 long) USING PARQUET") val df = spark.sql("INSERT INTO TABLE t2 SELECT * FROM t1") - checkAndGetWriteFiles(df) + checkWriteFilesAndGetChild(df) } } From 5ca17fca13971d36e3726a8b4ad7675f2be77c81 Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Mon, 17 Jun 2024 13:31:23 +0800 Subject: [PATCH 2/2] fix warning --- .../spark/sql/sources/GlutenInsertSuite.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala index 468226deed33..084c2faa8c5c 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala @@ -407,7 +407,7 @@ class GlutenInsertSuite withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") } @@ -422,12 +422,12 @@ class GlutenInsertSuite val incompatibleDefault = "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " + "table column `s` has a DEFAULT value" - Seq(Config("parquet"), Config("parquet", true)).foreach { + Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach { config => withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") } @@ -454,7 +454,7 @@ class GlutenInsertSuite withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") } @@ -471,12 +471,12 @@ class GlutenInsertSuite val incompatibleDefault = "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " + "table column `s` has a DEFAULT value" - Seq(Config("parquet"), Config("parquet", true)).foreach { + Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach { config => withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") } @@ -503,7 +503,7 @@ class GlutenInsertSuite withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") } @@ -568,12 +568,12 @@ class GlutenInsertSuite val incompatibleDefault = "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " + "table column `s` has a DEFAULT value" - Seq(Config("parquet"), Config("parquet", true)).foreach { + Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach { config => withTable("t") { sql(s"create table t(i boolean) using ${config.dataSource}") if (config.useDataFrames) { - Seq((false)).toDF.write.insertInto("t") + Seq(false).toDF.write.insertInto("t") } else { sql("insert into t select false") }