Skip to content

Commit

Permalink
fixup
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer committed Sep 10, 2024
1 parent 3cd9ffc commit 82fb53a
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.GlutenConfig

import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.dsl.expressions._
Expand All @@ -35,12 +37,12 @@ import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils

import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate}
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators
import org.apache.parquet.filter2.predicate.Operators.{Column => _, _}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.scalatest.Ignore

import java.sql.{Date, Timestamp}
import java.time.LocalDate
Expand Down Expand Up @@ -357,7 +359,38 @@ abstract class GltuenParquetFilterSuite extends ParquetFilterSuite with GlutenSQ
}

@ExtendedSQLTest
class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
class GlutenParquetV1FilterSuiteIOThreadsOn extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV1FilterSuiteIOThreadsOff extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuiteIOThreadsOn extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV2FilterSuiteIOThreadsOff extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

abstract class GlutenParquetV1FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down Expand Up @@ -444,8 +477,9 @@ class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQL
}
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
abstract class GlutenParquetV2FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.GlutenConfig

import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.dsl.expressions._
Expand All @@ -35,12 +37,12 @@ import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils

import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate}
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators
import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.scalatest.Ignore

import java.sql.{Date, Timestamp}
import java.time.LocalDate
Expand Down Expand Up @@ -328,7 +330,38 @@ abstract class GltuenParquetFilterSuite extends ParquetFilterSuite with GlutenSQ
}

@ExtendedSQLTest
class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
class GlutenParquetV1FilterSuiteIOThreadsOn extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV1FilterSuiteIOThreadsOff extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuiteIOThreadsOn extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV2FilterSuiteIOThreadsOff extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

abstract class GlutenParquetV1FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down Expand Up @@ -415,8 +448,9 @@ class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQL
}
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
abstract class GlutenParquetV2FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.GlutenConfig

import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.dsl.expressions._
Expand All @@ -40,6 +42,7 @@ import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.scalatest.Ignore

import java.sql.{Date, Timestamp}
import java.time.LocalDate
Expand Down Expand Up @@ -328,7 +331,38 @@ abstract class GltuenParquetFilterSuite extends ParquetFilterSuite with GlutenSQ
}

@ExtendedSQLTest
class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
class GlutenParquetV1FilterSuiteIOThreadsOn extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV1FilterSuiteIOThreadsOff extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuiteIOThreadsOn extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV2FilterSuiteIOThreadsOff extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

abstract class GlutenParquetV1FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down Expand Up @@ -415,8 +449,9 @@ class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQL
}
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
abstract class GlutenParquetV2FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,34 @@
*/
package org.apache.spark.sql.execution.datasources.parquet

import org.apache.gluten.GlutenConfig
import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators.{Eq, Gt, GtEq, Lt, LtEq, NotEq, Column => _}
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation, PushableColumnAndNestedColumn}
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation, PushableColumnAndNestedColumn}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.LegacyBehaviorPolicy.{CORRECTED, LEGACY}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType.INT96
import org.apache.spark.sql.types._
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils

import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.scalatest.Ignore

import java.sql.{Date, Timestamp}
import java.time.LocalDate

import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag

Expand Down Expand Up @@ -328,7 +328,38 @@ abstract class GltuenParquetFilterSuite extends ParquetFilterSuite with GlutenSQ
}

@ExtendedSQLTest
class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
class GlutenParquetV1FilterSuiteIOThreadsOn extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV1FilterSuiteIOThreadsOff extends GlutenParquetV1FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuiteIOThreadsOn extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "2")
}

@ExtendedSQLTest
@Ignore // https://github.com/apache/incubator-gluten/issues/7174
class GlutenParquetV2FilterSuiteIOThreadsOff extends GlutenParquetV2FilterSuite {
override def sparkConf: SparkConf =
super.sparkConf
.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key, "0")
}

abstract class GlutenParquetV1FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down Expand Up @@ -415,8 +446,9 @@ class GlutenParquetV1FilterSuite extends GltuenParquetFilterSuite with GlutenSQL
}
}

@ExtendedSQLTest
class GlutenParquetV2FilterSuite extends GltuenParquetFilterSuite with GlutenSQLTestsBaseTrait {
abstract class GlutenParquetV2FilterSuite
extends GltuenParquetFilterSuite
with GlutenSQLTestsBaseTrait {
// TODO: enable Parquet V2 write path after file source V2 writers are workable.
override def sparkConf: SparkConf =
super.sparkConf
Expand Down

0 comments on commit 82fb53a

Please sign in to comment.