[GLUTEN-3559] [VL] Fix unit tests in GlutenCSV* suites (#4254)

apache · Jan 3, 2024 · 72cecaa · 72cecaa
1 parent 3660e31
commit 72cecaa
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 10 deletions.
diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
@@ -186,17 +186,8 @@ class VeloxTestSettings extends BackendTestSettings {
     // Exception.
     .exclude("column pruning - non-readable file")
   enableSuite[GlutenCSVv1Suite]
-    .exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
-    .excludeByPrefix("lineSep with 2 chars when multiLine set to")
   enableSuite[GlutenCSVv2Suite]
-    .exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
-    .excludeByPrefix("lineSep with 2 chars when multiLine set to")
-    .exclude("test for FAILFAST parsing mode")
-    // exception test
-    .exclude("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern")
   enableSuite[GlutenCSVLegacyTimeParserSuite]
-    .exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
-    .excludeByPrefix("lineSep with 2 chars when multiLine set to")
   enableSuite[GlutenJsonV1Suite]
     // FIXME: Array direct selection fails
     .exclude("Complex field and type inferring")

diff --git a/...park34/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala b/...park34/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
@@ -16,7 +16,9 @@
  */
 package org.apache.spark.sql.execution.datasources.csv
 
-import org.apache.spark.SparkConf
+import io.glutenproject.exception.GlutenException
+
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
 import org.apache.spark.sql.GlutenTestConstants.GLUTEN_TEST
 import org.apache.spark.sql.internal.SQLConf
@@ -47,6 +49,32 @@ class GlutenCSVv2Suite extends GlutenCSVSuite {
     super.sparkConf
       .set(SQLConf.USE_V1_SOURCE_LIST, "")
 
+  override def testNameBlackList: Seq[String] = Seq(
+    // overwritten with different test
+    "test for FAILFAST parsing mode",
+    "SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern"
+  )
+
+  test(GLUTEN_TEST + "test for FAILFAST parsing mode") {
+    Seq(false, true).foreach {
+      multiLine =>
+        val exception = intercept[SparkException] {
+          spark.read
+            .format("csv")
+            .option("multiLine", multiLine)
+            .options(Map("header" -> "true", "mode" -> "failfast"))
+            .load(testFile(carsFile))
+            .collect()
+        }
+
+        assert(exception.getCause.isInstanceOf[GlutenException])
+        assert(
+          exception.getMessage.contains(
+            "[MALFORMED_RECORD_IN_PARSING] Malformed records are detected in record parsing: " +
+              "[2015,Chevy,Volt,null,null]"))
+    }
+  }
+
   test(GLUTEN_TEST + "SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {
     withTempPath {
       path =>