Skip to content

Commit

Permalink
[GLUTEN-6980][CORE] In shim poms, use Scala Maven compiler configurat…
Browse files Browse the repository at this point in the history
…ion inherited from parent pom (apache#6972)

This could fix build errors Intellij Idea IDE when scala-2.13 profile and spark-3.2/spark-3.3/spark-3.4 profiles are toggled on at the same time.

And with essential code cleanups.
  • Loading branch information
zhztheplayer authored Aug 22, 2024
1 parent 6b7df81 commit d12bf1f
Show file tree
Hide file tree
Showing 38 changed files with 343 additions and 409 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ object GlutenShuffleUtils {
endMapIndex: Int,
startPartition: Int,
endPartition: Int
): Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
SparkShimLoader.getSparkShims.getShuffleReaderParam(
handle,
startMapIndex,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import org.apache.spark.util.Utils
import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators
import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite with GlutenSQLTest
.getBlocks
.asScala
.map(_.getRowCount)
.toSeq
}

private def readRowGroupRowCounts(dir: File): Seq[Seq[Long]] = {
Expand Down
37 changes: 27 additions & 10 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
<description>Apache Gluten(incubating)</description>

<organization>
<name>Apache</name>
<url>http://gluten.apache.org</url>
<name>Apache</name>
<url>http://gluten.apache.org</url>
</organization>

<scm>
Expand Down Expand Up @@ -171,6 +171,7 @@
<version>0.1.2</version>
</dependency>
</dependencies> -->
<recompileMode>${scala.recompile.mode}</recompileMode>
<args>
<arg>-unchecked</arg>
<arg>-deprecation</arg>
Expand Down Expand Up @@ -215,9 +216,24 @@
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
-->
</args>
<compilerPlugins combine.self="override">
</compilerPlugins>
</configuration>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile-first</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
Expand Down Expand Up @@ -471,15 +487,15 @@
<appendAssemblyId>true</appendAssemblyId>
<descriptors>
<descriptor>
src/assembly/source-assembly.xml
src/assembly/source-assembly.xml
</descriptor>
</descriptors>
<finalName>apache-gluten-${project.version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</plugins>
</build>
</profile>
</profiles>
Expand Down Expand Up @@ -768,7 +784,7 @@
</goals>
<configuration>
<classifier>${scala.binary.version}</classifier>
<classifier>${sparkbundle.version}</classifier>
<classifier>${sparkbundle.version}</classifier>
</configuration>
</execution>
</executions>
Expand Down Expand Up @@ -828,6 +844,7 @@
<arg>-Ywarn-unused:imports</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
<arg>-Wconf:cat=deprecation:wv,any:e</arg>
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
</args>
</configuration>
Expand Down Expand Up @@ -945,7 +962,7 @@
<version>${spotless.version}</version>
<configuration>
<java>
<toggleOffOn />
<toggleOffOn/>
<googleJavaFormat>
<version>1.7</version>
</googleJavaFormat>
Expand All @@ -955,15 +972,15 @@
<order>org.apache.gluten,io.substrait.spark,,javax,java,scala,\#</order>
</importOrder>

<removeUnusedImports />
<removeUnusedImports/>
<licenseHeader>
<content>${spotless.license.header}</content>
<delimiter>${spotless.delimiter}</delimiter>
</licenseHeader>
</java>
<scala>
<!-- make it works `// spotless:off ` -->
<toggleOffOn />
<toggleOffOn/>
<scalafmt>
<version>${spotless.scalafmt.version}</version>
<scalaMajorVersion>${scala.binary.version}</scalaMajorVersion>
Expand Down
8 changes: 0 additions & 8 deletions shims/common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<configuration>
<args>
<arg>-Wconf:cat=deprecation:silent</arg>
<!--
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
-->
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,6 @@
*/
package org.apache.gluten.execution.datasource

import org.apache.gluten.GlutenConfig

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.datasources.{BlockStripes, FakeRow, OutputWriter}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StructType

import org.apache.hadoop.fs.FileStatus
import org.apache.hadoop.mapreduce.TaskAttemptContext

import scala.collection.JavaConverters.mapAsJavaMapConverter
import scala.collection.mutable

object GlutenParquetWriterInjects {
private var INSTANCE: GlutenFormatWriterInjects = _

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ trait SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
endPartition: Int)
: Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean]
endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean]

// Compatible with Spark-3.5 and later
def getShuffleAdvisoryPartitionSize(shuffle: ShuffleExchangeLike): Option[Long] = None
Expand Down
6 changes: 0 additions & 6 deletions shims/spark32/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,6 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<configuration>
<args>
<arg>-Wconf:cat=deprecation:silent</arg>
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ class Spark32Shims extends SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
endPartition: Int)
: Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex, startPartition, endPartition)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ object ShuffleUtils {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
endPartition: Int)
: Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], Boolean] = {
endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
val address = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
handle.shuffleId,
startMapIndex,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.spark.sql.execution

import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection}

import scala.collection.mutable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.gluten.metrics.GlutenTimeMetric

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, BoundReference, DynamicPruningExpression, Expression, PlanExpression, Predicate}
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, BoundReference, Expression, PlanExpression, Predicate}
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
import org.apache.spark.sql.types.StructType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
*/
package org.apache.spark.sql.execution.datasources

import org.apache.spark.{SparkException, TaskContext}
import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.BucketSpec
Expand All @@ -28,8 +27,6 @@ import org.apache.spark.sql.connector.write.WriterCommitMessage
import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
import org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec

import java.util.Date

/**
* This class is copied from Spark 3.4 and modified for Gluten. Spark 3.4 introduced a new operator,
* WriteFiles. In order to support the WriteTransformer in Spark 3.4, we need to copy the WriteFiles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.orc

import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenOrcWriterInjects

import org.apache.spark.TaskContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenParquetWriterInjects

import org.apache.spark.TaskContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ object CatalogUtil {
throw new UnsupportedOperationException(s"Partitioning by expressions")
}

(identityCols, bucketSpec)
(identityCols.toSeq, bucketSpec)
}
}
Loading

0 comments on commit d12bf1f

Please sign in to comment.