Skip to content

Commit

Permalink
[VL] Verify empty2null is offloaded when v1writer fallback (#6859)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yohahaha authored Aug 15, 2024
1 parent 2eb38cf commit b086734
Showing 1 changed file with 31 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.spark.sql.sources

import org.apache.gluten.execution.SortExecTransformer
import org.apache.gluten.execution.{ProjectExecTransformer, SortExecTransformer}
import org.apache.gluten.extension.GlutenPlan

import org.apache.spark.SparkConf
Expand Down Expand Up @@ -147,6 +147,36 @@ class GlutenInsertSuite
assert(parts == expectedPartitionNames)
}

testGluten("offload empty2null when v1writes fallback") {
withSQLConf((SQLConf.MAX_RECORDS_PER_FILE.key, "1000")) {
withTable("pt") {
spark.sql("CREATE TABLE pt (c1 int) USING PARQUET PARTITIONED BY(p string)")

val df = spark.sql(s"""
|INSERT OVERWRITE TABLE pt PARTITION(p)
|SELECT c1, c2 as p FROM source
|""".stripMargin)

val writeFiles = stripAQEPlan(
df.queryExecution.executedPlan
.asInstanceOf[CommandResultExec]
.commandPhysicalPlan).children.head
assert(!writeFiles.isInstanceOf[ColumnarWriteFilesExec])
assert(writeFiles.exists(_.isInstanceOf[ProjectExecTransformer]))
val projectExecTransformer = writeFiles
.find(_.isInstanceOf[ProjectExecTransformer])
.get
.asInstanceOf[ProjectExecTransformer]
projectExecTransformer.projectList.find(_.toString().contains("empty2null"))

// The partition column should never be empty
checkAnswer(
spark.sql("SELECT * FROM pt"),
spark.sql("SELECT c1, if(c2 = '', null, c2) FROM source"))
}
}
}

testGluten("remove v1writes sort and project") {
// Only string type has empty2null expression
withTable("pt") {
Expand Down

0 comments on commit b086734

Please sign in to comment.