[GLUTEN-6980][CORE] In shim poms, use Scala Maven compiler configurat…

…ion inherited from parent pom (apache#6972) This could fix build errors Intellij Idea IDE when scala-2.13 profile and spark-3.2/spark-3.3/spark-3.4 profiles are toggled on at the same time. And with essential code cleanups.
dcoliversun · Aug 22, 2024 · d12bf1f · d12bf1f
1 parent 6b7df81
commit d12bf1f
Show file tree

Hide file tree

Showing 38 changed files with 343 additions and 409 deletions.
diff --git a/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala b/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala
@@ -97,7 +97,7 @@ object GlutenShuffleUtils {
       endMapIndex: Int,
       startPartition: Int,
       endPartition: Int
-  ): Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
+  ): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
     SparkShimLoader.getSparkShims.getShuffleReaderParam(
       handle,
       startMapIndex,

diff --git a/...t/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala b/...t/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala
@@ -37,7 +37,6 @@ import org.apache.spark.util.Utils
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
 import org.apache.parquet.filter2.predicate.FilterApi._
-import org.apache.parquet.filter2.predicate.Operators
 import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq}
 import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
 import org.apache.parquet.hadoop.util.HadoopInputFile

diff --git a/...scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala b/...scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
@@ -49,6 +49,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite with GlutenSQLTest
       .getBlocks
       .asScala
       .map(_.getRowCount)
+      .toSeq
   }
 
   private def readRowGroupRowCounts(dir: File): Seq[Seq[Long]] = {

diff --git a/pom.xml b/pom.xml
@@ -24,8 +24,8 @@
   <description>Apache Gluten(incubating)</description>
 
   <organization>
-        <name>Apache</name>
-        <url>http://gluten.apache.org</url>
+    <name>Apache</name>
+    <url>http://gluten.apache.org</url>
   </organization>
 
   <scm>
@@ -171,6 +171,7 @@
                     <version>0.1.2</version>
                   </dependency>
                 </dependencies> -->
+                <recompileMode>${scala.recompile.mode}</recompileMode>
                 <args>
                   <arg>-unchecked</arg>
                   <arg>-deprecation</arg>
@@ -215,9 +216,24 @@
                   <arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
                   -->
                 </args>
-                <compilerPlugins combine.self="override">
-                </compilerPlugins>
               </configuration>
+              <executions>
+                <execution>
+                  <id>scala-compile-first</id>
+                  <phase>process-resources</phase>
+                  <goals>
+                    <goal>add-source</goal>
+                    <goal>compile</goal>
+                  </goals>
+                </execution>
+                <execution>
+                  <id>scala-test-compile-first</id>
+                  <phase>process-test-resources</phase>
+                  <goals>
+                    <goal>testCompile</goal>
+                  </goals>
+                </execution>
+              </executions>
             </plugin>
           </plugins>
         </pluginManagement>
@@ -471,15 +487,15 @@
                   <appendAssemblyId>true</appendAssemblyId>
                   <descriptors>
                     <descriptor>
-                    src/assembly/source-assembly.xml
+                      src/assembly/source-assembly.xml
                     </descriptor>
                   </descriptors>
                   <finalName>apache-gluten-${project.version}</finalName>
                 </configuration>
               </execution>
             </executions>
           </plugin>
-	</plugins>
+        </plugins>
       </build>
     </profile>
   </profiles>
@@ -768,7 +784,7 @@
               </goals>
               <configuration>
                 <classifier>${scala.binary.version}</classifier>
-		<classifier>${sparkbundle.version}</classifier>
+                <classifier>${sparkbundle.version}</classifier>
               </configuration>
             </execution>
           </executions>
@@ -828,6 +844,7 @@
               <arg>-Ywarn-unused:imports</arg>
               <arg>-deprecation</arg>
               <arg>-feature</arg>
+              <arg>-Wconf:cat=deprecation:wv,any:e</arg>
               <arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
             </args>
           </configuration>
@@ -945,7 +962,7 @@
           <version>${spotless.version}</version>
           <configuration>
             <java>
-              <toggleOffOn />
+              <toggleOffOn/>
               <googleJavaFormat>
                 <version>1.7</version>
               </googleJavaFormat>
@@ -955,15 +972,15 @@
                 <order>org.apache.gluten,io.substrait.spark,,javax,java,scala,\#</order>
               </importOrder>
 
-              <removeUnusedImports />
+              <removeUnusedImports/>
               <licenseHeader>
                 <content>${spotless.license.header}</content>
                 <delimiter>${spotless.delimiter}</delimiter>
               </licenseHeader>
             </java>
             <scala>
               <!--  make it works `// spotless:off `  -->
-              <toggleOffOn />
+              <toggleOffOn/>
               <scalafmt>
                 <version>${spotless.scalafmt.version}</version>
                 <scalaMajorVersion>${scala.binary.version}</scalaMajorVersion>

diff --git a/shims/common/pom.xml b/shims/common/pom.xml
@@ -58,14 +58,6 @@
       <plugin>
         <groupId>net.alchim31.maven</groupId>
         <artifactId>scala-maven-plugin</artifactId>
-        <configuration>
-          <args>
-            <arg>-Wconf:cat=deprecation:silent</arg>
-            <!--
-            <arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
-            -->
-          </args>
-        </configuration>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

diff --git a/...on/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala b/...on/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala
@@ -16,22 +16,6 @@
  */
 package org.apache.gluten.execution.datasource
 
-import org.apache.gluten.GlutenConfig
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.datasources.{BlockStripes, FakeRow, OutputWriter}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StructType
-
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.mapreduce.TaskAttemptContext
-
-import scala.collection.JavaConverters.mapAsJavaMapConverter
-import scala.collection.mutable
-
 object GlutenParquetWriterInjects {
   private var INSTANCE: GlutenFormatWriterInjects = _
 

diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
@@ -179,8 +179,7 @@ trait SparkShims {
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int)
-      : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean]
+      endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean]
 
   // Compatible with Spark-3.5 and later
   def getShuffleAdvisoryPartitionSize(shuffle: ShuffleExchangeLike): Option[Long] = None

diff --git a/shims/spark32/pom.xml b/shims/spark32/pom.xml
@@ -105,12 +105,6 @@
       <plugin>
         <groupId>net.alchim31.maven</groupId>
         <artifactId>scala-maven-plugin</artifactId>
-        <configuration>
-          <args>
-            <arg>-Wconf:cat=deprecation:silent</arg>
-            <arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
-          </args>
-        </configuration>
       </plugin>
       <plugin>
         <groupId>org.scalatest</groupId>

diff --git a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
@@ -181,8 +181,7 @@ class Spark32Shims extends SparkShims {
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int)
-      : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
+      endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
     ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex, startPartition, endPartition)
   }
 

diff --git a/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala b/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala
@@ -25,8 +25,7 @@ object ShuffleUtils {
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int)
-      : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
+      endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
     val address = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
       handle.shuffleId,
       startMapIndex,

diff --git a/.../spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala b/.../spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection}
 
 import scala.collection.mutable

diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -20,7 +20,7 @@ import org.apache.gluten.metrics.GlutenTimeMetric
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, BoundReference, DynamicPruningExpression, Expression, PlanExpression, Predicate}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, BoundReference, Expression, PlanExpression, Predicate}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.types.StructType

diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
@@ -16,8 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources
 
-import org.apache.spark.{SparkException, TaskContext}
-import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
+import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
@@ -28,8 +27,6 @@ import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec
 
-import java.util.Date
-
 /**
  * This class is copied from Spark 3.4 and modified for Gluten. Spark 3.4 introduced a new operator,
  * WriteFiles. In order to support the WriteTransformer in Spark 3.4, we need to copy the WriteFiles

diff --git a/...spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/...spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.sql.execution.datasources.orc
 
-import org.apache.gluten.GlutenConfig
 import org.apache.gluten.execution.datasource.GlutenOrcWriterInjects
 
 import org.apache.spark.TaskContext

diff --git a/...src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/...src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.sql.execution.datasources.parquet
 
-import org.apache.gluten.GlutenConfig
 import org.apache.gluten.execution.datasource.GlutenParquetWriterInjects
 
 import org.apache.spark.TaskContext

diff --git a/...rk32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala b/...rk32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala
@@ -39,6 +39,6 @@ object CatalogUtil {
         throw new UnsupportedOperationException(s"Partitioning by expressions")
     }
 
-    (identityCols, bucketSpec)
+    (identityCols.toSeq, bucketSpec)
   }
 }