Skip to content

Commit

Permalink
fallback timestamp scan for parquet if necessary
Browse files Browse the repository at this point in the history
  • Loading branch information
yma11 committed Sep 2, 2024
1 parent cc86a9a commit ca52396
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,23 @@ object VeloxBackendSettings extends BackendSettingsApi {
}

format match {
case ParquetReadFormat | DwrfReadFormat => ValidationResult.succeeded
case ParquetReadFormat =>
val typeValidator: PartialFunction[StructField, String] = {
// Parquet scan of nested array with struct/array as element type is not fully
// verified in Velox.
case StructField(_, arrayType: ArrayType, _, _)
if arrayType.elementType.isInstanceOf[StructType] =>
"StructType as element in ArrayType"
case StructField(_, arrayType: ArrayType, _, _)
if arrayType.elementType.isInstanceOf[ArrayType] =>
"ArrayType as element in ArrayType"
// Parquet timestamp is not fully supported yet
case StructField(_, TimestampType, _, _)
if GlutenConfig.getConf.forceParquetTimestampTypeScanFallbackEnabled =>
"TimestampType"
}
validateTypes(typeValidator)
case DwrfReadFormat => ValidationResult.succeeded
case OrcReadFormat =>
if (!GlutenConfig.getConf.veloxOrcScanEnabled) {
ValidationResult.failed(s"Velox ORC scan is turned off.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ object ExpressionConverter extends SQLConfHelper with Logging {
// Different backends may have different result.
BackendsApiManager.getSparkPlanExecApiInstance.genGetStructFieldTransformer(
substraitExprName,
replaceWithExpressionTransformerInternal(
replaceWithExpressionTransformer0(
getStructField.child,
attributeSeq,
expressionsMap),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -899,10 +899,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
enableSuite[GlutenParquetProtobufCompatibilitySuite]
// Velox issue
.exclude("unannotated array of struct with unannotated array")
.exclude("struct with unannotated array")
.exclude("unannotated array of struct")
enableSuite[GlutenParquetV1QuerySuite]
// Unsupport spark.sql.files.ignoreCorruptFiles.
.exclude("Enabling/disabling ignoreCorruptFiles")
Expand Down Expand Up @@ -946,7 +942,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetThriftCompatibilitySuite]
// Rewrite for file locating.
.exclude("Read Parquet file generated by parquet-thrift")
.exclude("SPARK-10136 list of primitive list")
enableSuite[GlutenFileTableSuite]
enableSuite[GlutenBucketingUtilsSuite]
enableSuite[GlutenDataSourceStrategySuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -717,10 +717,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
enableSuite[GlutenParquetProtobufCompatibilitySuite]
// Velox issue
.exclude("unannotated array of struct with unannotated array")
.exclude("struct with unannotated array")
.exclude("unannotated array of struct")
enableSuite[GlutenParquetV1QuerySuite]
// Unsupport spark.sql.files.ignoreCorruptFiles.
.exclude("Enabling/disabling ignoreCorruptFiles")
Expand Down Expand Up @@ -766,7 +762,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetThriftCompatibilitySuite]
// Rewrite for file locating.
.exclude("Read Parquet file generated by parquet-thrift")
.exclude("SPARK-10136 list of primitive list")
enableSuite[GlutenParquetVectorizedSuite]
enableSuite[GlutenTextV1Suite]
enableSuite[GlutenTextV2Suite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,10 +701,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
enableSuite[GlutenParquetProtobufCompatibilitySuite]
// Velox issue
.exclude("unannotated array of struct with unannotated array")
.exclude("struct with unannotated array")
.exclude("unannotated array of struct")
enableSuite[GlutenParquetV1QuerySuite]
// Unsupport spark.sql.files.ignoreCorruptFiles.
.exclude("Enabling/disabling ignoreCorruptFiles")
Expand Down Expand Up @@ -760,7 +756,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetThriftCompatibilitySuite]
// Rewrite for file locating.
.exclude("Read Parquet file generated by parquet-thrift")
.exclude("SPARK-10136 list of primitive list")
enableSuite[GlutenParquetVectorizedSuite]
enableSuite[GlutenTextV1Suite]
enableSuite[GlutenTextV2Suite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -706,10 +706,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
enableSuite[GlutenParquetProtobufCompatibilitySuite]
// Velox issue
.exclude("unannotated array of struct with unannotated array")
.exclude("struct with unannotated array")
.exclude("unannotated array of struct")
enableSuite[GlutenParquetV1QuerySuite]
.exclude("row group skipping doesn't overflow when reading into larger type")
// Unsupport spark.sql.files.ignoreCorruptFiles.
Expand Down Expand Up @@ -767,7 +763,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetThriftCompatibilitySuite]
// Rewrite for file locating.
.exclude("Read Parquet file generated by parquet-thrift")
.exclude("SPARK-10136 list of primitive list")
enableSuite[GlutenParquetVectorizedSuite]
enableSuite[GlutenTextV1Suite]
enableSuite[GlutenTextV2Suite]
Expand Down

0 comments on commit ca52396

Please sign in to comment.