From dafd525afbdb2f4a10ecea46a5a6775456a1f522 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 15 Aug 2024 14:36:27 +0800 Subject: [PATCH] [VL] Verify empty2null is offloaded when v1writer fallback (#6859) --- .../spark/sql/sources/GlutenInsertSuite.scala | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala index 5c60115c5e1d..ca0ada39ceec 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.sources -import org.apache.gluten.execution.SortExecTransformer +import org.apache.gluten.execution.{ProjectExecTransformer, SortExecTransformer} import org.apache.gluten.extension.GlutenPlan import org.apache.spark.SparkConf @@ -147,6 +147,36 @@ class GlutenInsertSuite assert(parts == expectedPartitionNames) } + testGluten("offload empty2null when v1writes fallback") { + withSQLConf((SQLConf.MAX_RECORDS_PER_FILE.key, "1000")) { + withTable("pt") { + spark.sql("CREATE TABLE pt (c1 int) USING PARQUET PARTITIONED BY(p string)") + + val df = spark.sql(s""" + |INSERT OVERWRITE TABLE pt PARTITION(p) + |SELECT c1, c2 as p FROM source + |""".stripMargin) + + val writeFiles = stripAQEPlan( + df.queryExecution.executedPlan + .asInstanceOf[CommandResultExec] + .commandPhysicalPlan).children.head + assert(!writeFiles.isInstanceOf[ColumnarWriteFilesExec]) + assert(writeFiles.exists(_.isInstanceOf[ProjectExecTransformer])) + val projectExecTransformer = writeFiles + .find(_.isInstanceOf[ProjectExecTransformer]) + .get + .asInstanceOf[ProjectExecTransformer] + projectExecTransformer.projectList.find(_.toString().contains("empty2null")) + + // The partition column should never be empty + checkAnswer( + spark.sql("SELECT * FROM pt"), + spark.sql("SELECT c1, if(c2 = '', null, c2) FROM source")) + } + } + } + testGluten("remove v1writes sort and project") { // Only string type has empty2null expression withTable("pt") {