diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala index 1da1b6df3f384..40188fe448f12 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala @@ -99,7 +99,23 @@ object VeloxBackendSettings extends BackendSettingsApi { } format match { - case ParquetReadFormat | DwrfReadFormat => ValidationResult.succeeded + case ParquetReadFormat => + val typeValidator: PartialFunction[StructField, String] = { + // Parquet scan of nested array with struct/array as element type is not fully + // verified in Velox. + case StructField(_, arrayType: ArrayType, _, _) + if arrayType.elementType.isInstanceOf[StructType] => + "StructType as element in ArrayType" + case StructField(_, arrayType: ArrayType, _, _) + if arrayType.elementType.isInstanceOf[ArrayType] => + "ArrayType as element in ArrayType" + // Parquet timestamp is not fully supported yet + case StructField(_, TimestampType, _, _) + if GlutenConfig.getConf.forceParquetTimestampTypeScanFallbackEnabled => + "TimestampType" + } + validateTypes(typeValidator) + case DwrfReadFormat => ValidationResult.succeeded case OrcReadFormat => if (!GlutenConfig.getConf.veloxOrcScanEnabled) { ValidationResult.failed(s"Velox ORC scan is turned off.") diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 78ea77129ae9a..e064f2afc9d7a 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -899,10 +899,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] - // Velox issue - .exclude("unannotated array of struct with unannotated array") - .exclude("struct with unannotated array") - .exclude("unannotated array of struct") enableSuite[GlutenParquetV1QuerySuite] // Unsupport spark.sql.files.ignoreCorruptFiles. .exclude("Enabling/disabling ignoreCorruptFiles") @@ -946,7 +942,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetThriftCompatibilitySuite] // Rewrite for file locating. .exclude("Read Parquet file generated by parquet-thrift") - .exclude("SPARK-10136 list of primitive list") enableSuite[GlutenFileTableSuite] enableSuite[GlutenBucketingUtilsSuite] enableSuite[GlutenDataSourceStrategySuite] diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index e0dd1a7b4f63f..6c21fc7cc2ba1 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -717,10 +717,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] - // Velox issue - .exclude("unannotated array of struct with unannotated array") - .exclude("struct with unannotated array") - .exclude("unannotated array of struct") enableSuite[GlutenParquetV1QuerySuite] // Unsupport spark.sql.files.ignoreCorruptFiles. .exclude("Enabling/disabling ignoreCorruptFiles") @@ -766,7 +762,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetThriftCompatibilitySuite] // Rewrite for file locating. .exclude("Read Parquet file generated by parquet-thrift") - .exclude("SPARK-10136 list of primitive list") enableSuite[GlutenParquetVectorizedSuite] enableSuite[GlutenTextV1Suite] enableSuite[GlutenTextV2Suite] diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index c8dbecc97b229..95a5fe107f474 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -701,10 +701,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] - // Velox issue - .exclude("unannotated array of struct with unannotated array") - .exclude("struct with unannotated array") - .exclude("unannotated array of struct") enableSuite[GlutenParquetV1QuerySuite] // Unsupport spark.sql.files.ignoreCorruptFiles. .exclude("Enabling/disabling ignoreCorruptFiles") @@ -760,7 +756,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetThriftCompatibilitySuite] // Rewrite for file locating. .exclude("Read Parquet file generated by parquet-thrift") - .exclude("SPARK-10136 list of primitive list") enableSuite[GlutenParquetVectorizedSuite] enableSuite[GlutenTextV1Suite] enableSuite[GlutenTextV2Suite] diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 672f186022b73..e8d8730e93663 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -706,10 +706,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] - // Velox issue - .exclude("unannotated array of struct with unannotated array") - .exclude("struct with unannotated array") - .exclude("unannotated array of struct") enableSuite[GlutenParquetV1QuerySuite] .exclude("row group skipping doesn't overflow when reading into larger type") // Unsupport spark.sql.files.ignoreCorruptFiles. @@ -767,7 +763,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenParquetThriftCompatibilitySuite] // Rewrite for file locating. .exclude("Read Parquet file generated by parquet-thrift") - .exclude("SPARK-10136 list of primitive list") enableSuite[GlutenParquetVectorizedSuite] enableSuite[GlutenTextV1Suite] enableSuite[GlutenTextV2Suite]