Skip to content

Commit

Permalink
[VL] Fix ORC related failed UT
Browse files Browse the repository at this point in the history
  • Loading branch information
chenxu14 committed Nov 22, 2023
1 parent 5966579 commit fc04d0c
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
checkOperatorMatch[HashAggregateExecTransformer](result)
}

ignore("orc scan") {
test("orc scan") {
val df = spark.read
.format("orc")
.load("../cpp/velox/benchmarks/data/bm_lineitem/orc/lineitem.orc")
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=update
VELOX_REPO=https://github.com/chenxu14/velox.git
VELOX_BRANCH=chenxu14_dev
VELOX_HOME=""

#Set on run gluten on HDFS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,6 @@ class VeloxTestSettings extends BackendTestSettings {
// Not useful and time consuming.
.exclude("SPARK-33084: Add jar support Ivy URI in SQL")
.exclude("SPARK-33084: Add jar support Ivy URI in SQL -- jar contains udf class")
// ReaderFactory is not registered for format orc.
.exclude("SPARK-33593: Vector reader got incorrect data with binary partition value")
enableSuite[GlutenDatasetAggregatorSuite]
enableSuite[GlutenDatasetOptimizationSuite]
enableSuite[GlutenDatasetPrimitiveSuite]
Expand Down Expand Up @@ -364,8 +362,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Return correct results when data columns overlap with partition " +
"columns (nested data)")
.exclude("SPARK-31116: Select nested schema with case insensitive mode")
// ReaderFactory is not registered for format orc.
.exclude("SPARK-15474 Write and read back non-empty schema with empty dataframe - orc")
.exclude("SPARK-23271 empty RDD when saved should write a metadata only file - orc")
.exclude("SPARK-22146 read files containing special characters using orc")
.exclude("Do not use cache on overwrite")
Expand Down Expand Up @@ -407,13 +403,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenOrcPartitionDiscoverySuite]
.exclude("read partitioned table - normal case")
.exclude("read partitioned table - with nulls")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1PartitionDiscoverySuite]
.exclude("read partitioned table - normal case")
.exclude("read partitioned table - with nulls")
.exclude("read partitioned table - partition key included in orc file")
.exclude("read partitioned table - with nulls and partition keys are included in Orc file")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1QuerySuite]
// Rewrite to disable Spark's columnar reader.
.exclude("Simple selection form ORC table")
Expand Down Expand Up @@ -456,7 +450,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-37463: read/write Timestamp ntz to Orc with different time zone")
.exclude("SPARK-39381: Make vectorized orc columar writer batch size configurable")
.exclude("SPARK-39830: Reading ORC table that requires type promotion may throw AIOOBE")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV2QuerySuite]
.exclude("Read/write binary data")
.exclude("Read/write all types with non-primitive type")
Expand Down Expand Up @@ -498,7 +491,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-5309 strings stored using dictionary compression in orc")
// For exception test.
.exclude("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcSourceSuite]
// Rewrite to disable Spark's columnar reader.
.exclude("SPARK-31238: compatibility with Spark 2.4 in reading dates")
Expand All @@ -516,7 +508,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Gluten - SPARK-31238: compatibility with Spark 2.4 in reading dates")
.exclude("Gluten - SPARK-31238, SPARK-31423: rebasing dates in write")
.exclude("Gluten - SPARK-34862: Support ORC vectorized reader for nested column")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1FilterSuite]
.exclude("SPARK-32622: case sensitivity in predicate pushdown")
enableSuite[GlutenOrcV1SchemaPruningSuite]
Expand Down Expand Up @@ -659,7 +650,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV2SchemaPruningSuite]
.exclude(
"Spark vectorized reader - without partition data column - select only top-level fields")
Expand Down Expand Up @@ -795,7 +785,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenParquetColumnIndexSuite]
// Rewrite by just removing test timestamp.
.exclude("test reading unaligned pages - test all types")
Expand Down Expand Up @@ -925,7 +914,6 @@ class VeloxTestSettings extends BackendTestSettings {
// Unsupported compression codec.
.exclude("write and read - file source parquet - codec: lz4")
enableSuite[GlutenOrcCodecSuite]
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenFileSourceStrategySuite]
// Plan comparison.
.exclude("partitioned table - after scan filters")
Expand Down Expand Up @@ -959,7 +947,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("hide a nested column in the middle of the leaf struct column")
.exclude("hide a nested column at the end of the middle struct column")
.exclude("hide a nested column in the middle of the middle struct column")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenVectorizedOrcReadSchemaSuite]
// Rewrite to disable Spark's vectorized reading.
.exclude("change column position")
Expand All @@ -981,7 +968,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("change column type from float to double")
.exclude("Gluten - read byte, int, short, long together")
.exclude("Gluten - read float and double together")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenMergedOrcReadSchemaSuite]
.exclude("append column into middle")
.exclude("add a nested column at the end of the leaf struct column")
Expand All @@ -999,7 +985,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("read byte, int, short, long together")
.exclude("change column type from float to double")
.exclude("read float and double together")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenParquetReadSchemaSuite]
enableSuite[GlutenVectorizedParquetReadSchemaSuite]
enableSuite[GlutenMergedParquetReadSchemaSuite]
Expand Down Expand Up @@ -1089,7 +1074,5 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-33687: analyze all tables in a specific database")
enableSuite[FallbackStrategiesSuite]
enableSuite[GlutenHiveSQLQuerySuite]
// ReaderFactory is not registered for format orc.
.exclude("hive orc scan")
}
// scalastyle:on line.size.limit
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenOrcPartitionDiscoverySuite]
.exclude("read partitioned table - normal case")
.exclude("read partitioned table - with nulls")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1PartitionDiscoverySuite]
.exclude("read partitioned table - normal case")
.exclude("read partitioned table - with nulls")
.exclude("read partitioned table - partition key included in orc file")
.exclude("read partitioned table - with nulls and partition keys are included in Orc file")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1QuerySuite]
// Rewrite to disable Spark's columnar reader.
.exclude("Simple selection form ORC table")
Expand Down Expand Up @@ -278,7 +276,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-37463: read/write Timestamp ntz to Orc with different time zone")
.exclude("SPARK-39381: Make vectorized orc columar writer batch size configurable")
.exclude("SPARK-39830: Reading ORC table that requires type promotion may throw AIOOBE")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV2QuerySuite]
.exclude("Read/write binary data")
.exclude("Read/write all types with non-primitive type")
Expand Down Expand Up @@ -320,7 +317,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-5309 strings stored using dictionary compression in orc")
// For exception test.
.exclude("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcSourceSuite]
// Rewrite to disable Spark's columnar reader.
.exclude("SPARK-31238: compatibility with Spark 2.4 in reading dates")
Expand All @@ -344,7 +340,6 @@ class VeloxTestSettings extends BackendTestSettings {
// rewrite
.exclude("SPARK-36931: Support reading and writing ANSI intervals (spark.sql.orc.enableVectorizedReader=true, spark.sql.orc.enableNestedColumnVectorizedReader=true)")
.exclude("SPARK-36931: Support reading and writing ANSI intervals (spark.sql.orc.enableVectorizedReader=true, spark.sql.orc.enableNestedColumnVectorizedReader=false)")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV1FilterSuite]
.exclude("SPARK-32622: case sensitivity in predicate pushdown")
enableSuite[GlutenOrcV1SchemaPruningSuite]
Expand Down Expand Up @@ -487,7 +482,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV2SchemaPruningSuite]
.exclude(
"Spark vectorized reader - without partition data column - select only top-level fields")
Expand Down Expand Up @@ -623,7 +617,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenParquetColumnIndexSuite]
// Rewrite by just removing test timestamp.
.exclude("test reading unaligned pages - test all types")
Expand Down Expand Up @@ -788,14 +781,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV1AggregatePushDownSuite]
enableSuite[GlutenParquetV2AggregatePushDownSuite]
enableSuite[GlutenOrcV1AggregatePushDownSuite]
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenOrcV2AggregatePushDownSuite]
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenParquetCodecSuite]
// Unsupported compression codec.
.exclude("write and read - file source parquet - codec: lz4")
enableSuite[GlutenOrcCodecSuite]
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenFileSourceStrategySuite]
// Plan comparison.
.exclude("partitioned table - after scan filters")
Expand Down Expand Up @@ -829,7 +819,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("hide a nested column in the middle of the leaf struct column")
.exclude("hide a nested column at the end of the middle struct column")
.exclude("hide a nested column in the middle of the middle struct column")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenVectorizedOrcReadSchemaSuite]
// Rewrite to disable Spark's vectorized reading.
.exclude("change column position")
Expand All @@ -851,7 +840,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("change column type from float to double")
.exclude("Gluten - read byte, int, short, long together")
.exclude("Gluten - read float and double together")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenMergedOrcReadSchemaSuite]
.exclude("append column into middle")
.exclude("add a nested column at the end of the leaf struct column")
Expand All @@ -869,7 +857,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("read byte, int, short, long together")
.exclude("change column type from float to double")
.exclude("read float and double together")
.disableByReason("Blocked by ORC Velox upstream not ready")
enableSuite[GlutenParquetReadSchemaSuite]
enableSuite[GlutenVectorizedParquetReadSchemaSuite]
enableSuite[GlutenMergedParquetReadSchemaSuite]
Expand Down Expand Up @@ -1099,8 +1086,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-31116: Select nested schema with case insensitive mode")
// exclude as original metric not correct when task offloaded to velox
.exclude("SPARK-37585: test input metrics for DSV2 with output limits")
// ReaderFactory is not registered for format orc.
.exclude("SPARK-15474 Write and read back non-empty schema with empty dataframe - orc")
.exclude("SPARK-23271 empty RDD when saved should write a metadata only file - orc")
.exclude("SPARK-22146 read files containing special characters using orc")
.exclude("SPARK-30362: test input metrics for DSV2")
Expand Down Expand Up @@ -1152,8 +1137,6 @@ class VeloxTestSettings extends BackendTestSettings {
// Not useful and time consuming.
.exclude("SPARK-33084: Add jar support Ivy URI in SQL")
.exclude("SPARK-33084: Add jar support Ivy URI in SQL -- jar contains udf class")
// ReaderFactory is not registered for format orc.
.exclude("SPARK-33593: Vector reader got incorrect data with binary partition value")
enableSuite[GlutenSQLQueryTestSuite]
enableSuite[GlutenStatisticsCollectionSuite]
.exclude("SPARK-33687: analyze all tables in a specific database")
Expand All @@ -1170,8 +1153,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenXPathFunctionsSuite]
enableSuite[GlutenFallbackSuite]
enableSuite[GlutenHiveSQLQuerySuite]
// ReaderFactory is not registered for format orc.
.exclude("hive orc scan")
enableSuite[GlutenImplicitsTest]
}
// scalastyle:on line.size.limit
Loading

0 comments on commit fc04d0c

Please sign in to comment.