From 65bb8343be4a5bbd0da3bbdc85360f97f04f430e Mon Sep 17 00:00:00 2001
From: Chang Chen <baibaichen@gmail.com>
Date: Mon, 17 Jun 2024 16:39:23 +0800
Subject: [PATCH 1/2] Refactor GlutenV1WriteCommandSuite and GlutenInsertSuite,
 so we can compile spark-ut with spark-3.5 profile

---
 gluten-ut/spark35/pom.xml                     | 44 +++++++++++++++++++
 .../GlutenColumnarWriteTestSupport.scala      | 26 +++++++++++
 .../GlutenColumnarWriteTestSupport.scala      | 27 ++++++++++++
 .../GlutenV1WriteCommandSuite.scala           | 12 ++---
 .../spark/sql/sources/GlutenInsertSuite.scala | 24 +++++-----
 5 files changed, 116 insertions(+), 17 deletions(-)
 create mode 100644 gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
 create mode 100644 gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
diff --git a/gluten-ut/spark35/pom.xml b/gluten-ut/spark35/pom.xml
index cf2129389a6e..2bf1c93a0052 100644
--- a/gluten-ut/spark35/pom.xml
+++ b/gluten-ut/spark35/pom.xml
@@ -63,6 +63,28 @@
           <scope>test</scope>
         </dependency>
       </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/test/backends-clickhouse</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
     <profile>
       <id>backends-velox</id>
@@ -155,6 +177,28 @@
         <log4j.version>2.19.0</log4j.version>
         <clickhouse.lib.path></clickhouse.lib.path>
       </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/test/backends-velox</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
   </profiles>
 
diff --git a/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
new file mode 100644
index 000000000000..43b83afe9af3
--- /dev/null
+++ b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten
+
+import org.apache.spark.sql.execution.SparkPlan
+
+trait GlutenColumnarWriteTestSupport {
+
+  def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = {
+    throw new UnsupportedOperationException("Clickhouse Backend does not support write files")
+  }
+}
diff --git a/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
new file mode 100644
index 000000000000..c7ad606bcf8d
--- /dev/null
+++ b/gluten-ut/spark35/src/test/backends-velox/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten
+
+import org.apache.spark.sql.execution.{SparkPlan, VeloxColumnarWriteFilesExec}
+
+trait GlutenColumnarWriteTestSupport {
+
+  def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = {
+    assert(sparkPlan.isInstanceOf[VeloxColumnarWriteFilesExec])
+    sparkPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child
+  }
+}
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala
index 3d277b94cc3e..fcaf75a4d5c1 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/GlutenV1WriteCommandSuite.scala
@@ -16,12 +16,13 @@
  */
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.gluten.GlutenColumnarWriteTestSupport
 import org.apache.gluten.execution.SortExecTransformer
 
 import org.apache.spark.sql.GlutenSQLTestsBaseTrait
 import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, NullsFirst, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort}
-import org.apache.spark.sql.execution.{QueryExecution, SortExec, VeloxColumnarWriteFilesExec}
+import org.apache.spark.sql.execution.{QueryExecution, SortExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, StringType}
@@ -96,7 +97,8 @@ trait GlutenV1WriteCommandSuiteBase extends V1WriteCommandSuiteBase {
 class GlutenV1WriteCommandSuite
   extends V1WriteCommandSuite
   with GlutenV1WriteCommandSuiteBase
-  with GlutenSQLTestsBaseTrait {
+  with GlutenSQLTestsBaseTrait
+  with GlutenColumnarWriteTestSupport {
 
   testGluten(
     "SPARK-41914: v1 write with AQE and in-partition sorted - non-string partition column") {
@@ -122,8 +124,7 @@ class GlutenV1WriteCommandSuite
             val executedPlan = FileFormatWriter.executedPlan.get
 
             val plan = if (enabled) {
-              assert(executedPlan.isInstanceOf[VeloxColumnarWriteFilesExec])
-              executedPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child
+              checkWriteFilesAndGetChild(executedPlan)
             } else {
               executedPlan.transformDown { case a: AdaptiveSparkPlanExec => a.executedPlan }
             }
@@ -204,8 +205,7 @@ class GlutenV1WriteCommandSuite
           val executedPlan = FileFormatWriter.executedPlan.get
 
           val plan = if (enabled) {
-            assert(executedPlan.isInstanceOf[VeloxColumnarWriteFilesExec])
-            executedPlan.asInstanceOf[VeloxColumnarWriteFilesExec].child
+            checkWriteFilesAndGetChild(executedPlan)
           } else {
             executedPlan.transformDown { case a: AdaptiveSparkPlanExec => a.executedPlan }
           }
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
index 2814c2e8cba6..468226deed33 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.sources
 
+import org.apache.gluten.GlutenColumnarWriteTestSupport
 import org.apache.gluten.execution.SortExecTransformer
 import org.apache.gluten.extension.GlutenPlan
 
@@ -24,7 +25,7 @@ import org.apache.spark.executor.OutputMetrics
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.execution.{CommandResultExec, QueryExecution, VeloxColumnarWriteFilesExec}
+import org.apache.spark.sql.execution.{CommandResultExec, QueryExecution, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.metric.SQLMetric
@@ -38,7 +39,8 @@ import java.io.{File, IOException}
 class GlutenInsertSuite
   extends InsertSuite
   with GlutenSQLTestsBaseTrait
-  with AdaptiveSparkPlanHelper {
+  with AdaptiveSparkPlanHelper
+  with GlutenColumnarWriteTestSupport {
 
   override def sparkConf: SparkConf = {
     super.sparkConf.set("spark.sql.leafNodeDefaultParallelism", "1")
@@ -60,13 +62,13 @@ class GlutenInsertSuite
     super.afterAll()
   }
 
-  private def checkAndGetWriteFiles(df: DataFrame): VeloxColumnarWriteFilesExec = {
+  private def checkWriteFilesAndGetChild(df: DataFrame): (SparkPlan, SparkPlan) = {
     val writeFiles = stripAQEPlan(
       df.queryExecution.executedPlan
         .asInstanceOf[CommandResultExec]
         .commandPhysicalPlan).children.head
-    assert(writeFiles.isInstanceOf[VeloxColumnarWriteFilesExec])
-    writeFiles.asInstanceOf[VeloxColumnarWriteFilesExec]
+    val child = checkWriteFilesAndGetChild(writeFiles)
+    (writeFiles, child)
   }
 
   testGluten("insert partition table") {
@@ -97,7 +99,7 @@ class GlutenInsertSuite
         val df =
           spark.sql("INSERT INTO TABLE pt partition(pt='a') SELECT * FROM VALUES(1, 'a'),(2, 'b')")
         spark.sparkContext.listenerBus.waitUntilEmpty()
-        checkAndGetWriteFiles(df)
+        checkWriteFilesAndGetChild(df)
 
         assert(taskMetrics.bytesWritten > 0)
         assert(taskMetrics.recordsWritten == 2)
@@ -135,13 +137,13 @@ class GlutenInsertSuite
   private def validateDynamicPartitionWrite(
       df: DataFrame,
       expectedPartitionNames: Set[String]): Unit = {
-    val writeFiles = checkAndGetWriteFiles(df)
+    val (writeFiles, writeChild) = checkWriteFilesAndGetChild(df)
     assert(
       writeFiles
         .find(_.isInstanceOf[SortExecTransformer])
         .isEmpty)
     // all operators should be transformed
-    assert(writeFiles.child.find(!_.isInstanceOf[GlutenPlan]).isEmpty)
+    assert(writeChild.find(!_.isInstanceOf[GlutenPlan]).isEmpty)
 
     val parts = spark.sessionState.catalog.listPartitionNames(TableIdentifier("pt")).toSet
     assert(parts == expectedPartitionNames)
@@ -209,7 +211,7 @@ class GlutenInsertSuite
       spark.sql("CREATE TABLE t (c1 int, c2 string) USING PARQUET")
 
       val df = spark.sql("INSERT OVERWRITE TABLE t SELECT c1, c2 FROM source SORT BY c1")
-      val writeFiles = checkAndGetWriteFiles(df)
+      val (writeFiles, _) = checkWriteFilesAndGetChild(df)
       assert(writeFiles.find(x => x.isInstanceOf[SortExecTransformer]).isDefined)
       checkAnswer(spark.sql("SELECT * FROM t"), spark.sql("SELECT * FROM source SORT BY c1"))
     }
@@ -244,7 +246,7 @@ class GlutenInsertSuite
       spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT id as c1, id % 3 as c2 FROM range(10)")
       spark.sql("CREATE TABLE t2 (c1 long, c2 long) USING PARQUET")
       val df = spark.sql("INSERT INTO TABLE t2 SELECT c2, count(*) FROM t1 GROUP BY c2")
-      checkAndGetWriteFiles(df)
+      checkWriteFilesAndGetChild(df)
     }
   }
 
@@ -257,7 +259,7 @@ class GlutenInsertSuite
       spark.sql("INSERT INTO TABLE t1 VALUES(1, 1),(2, 2)")
       spark.sql("CREATE TABLE t2 (c1 long, c2 long) USING PARQUET")
       val df = spark.sql("INSERT INTO TABLE t2 SELECT * FROM t1")
-      checkAndGetWriteFiles(df)
+      checkWriteFilesAndGetChild(df)
     }
   }
 

From 5ca17fca13971d36e3726a8b4ad7675f2be77c81 Mon Sep 17 00:00:00 2001
From: Chang Chen <baibaichen@gmail.com>
Date: Mon, 17 Jun 2024 13:31:23 +0800
Subject: [PATCH 2/2] fix warning

---
 .../spark/sql/sources/GlutenInsertSuite.scala  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
index 468226deed33..084c2faa8c5c 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
@@ -407,7 +407,7 @@ class GlutenInsertSuite
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }
@@ -422,12 +422,12 @@ class GlutenInsertSuite
       val incompatibleDefault =
         "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " +
           "table column `s` has a DEFAULT value"
-      Seq(Config("parquet"), Config("parquet", true)).foreach {
+      Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach {
         config =>
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }
@@ -454,7 +454,7 @@ class GlutenInsertSuite
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }
@@ -471,12 +471,12 @@ class GlutenInsertSuite
       val incompatibleDefault =
         "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " +
           "table column `s` has a DEFAULT value"
-      Seq(Config("parquet"), Config("parquet", true)).foreach {
+      Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach {
         config =>
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }
@@ -503,7 +503,7 @@ class GlutenInsertSuite
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }
@@ -568,12 +568,12 @@ class GlutenInsertSuite
       val incompatibleDefault =
         "Failed to execute ALTER TABLE ADD COLUMNS command because the destination " +
           "table column `s` has a DEFAULT value"
-      Seq(Config("parquet"), Config("parquet", true)).foreach {
+      Seq(Config("parquet"), Config("parquet", useDataFrames = true)).foreach {
         config =>
           withTable("t") {
             sql(s"create table t(i boolean) using ${config.dataSource}")
             if (config.useDataFrames) {
-              Seq((false)).toDF.write.insertInto("t")
+              Seq(false).toDF.write.insertInto("t")
             } else {
               sql("insert into t select false")
             }