Skip to content

Commit

Permalink
[GLUTEN-3559] [VL] Fix unit tests in GlutenCSV* suites (#4254)
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhaska authored Jan 3, 2024
1 parent 3660e31 commit 72cecaa
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,8 @@ class VeloxTestSettings extends BackendTestSettings {
// Exception.
.exclude("column pruning - non-readable file")
enableSuite[GlutenCSVv1Suite]
.exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
.excludeByPrefix("lineSep with 2 chars when multiLine set to")
enableSuite[GlutenCSVv2Suite]
.exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
.excludeByPrefix("lineSep with 2 chars when multiLine set to")
.exclude("test for FAILFAST parsing mode")
// exception test
.exclude("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern")
enableSuite[GlutenCSVLegacyTimeParserSuite]
.exclude("SPARK-23786: warning should be printed if CSV header doesn't conform to schema")
.excludeByPrefix("lineSep with 2 chars when multiLine set to")
enableSuite[GlutenJsonV1Suite]
// FIXME: Array direct selection fails
.exclude("Complex field and type inferring")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
*/
package org.apache.spark.sql.execution.datasources.csv

import org.apache.spark.SparkConf
import io.glutenproject.exception.GlutenException

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
import org.apache.spark.sql.GlutenTestConstants.GLUTEN_TEST
import org.apache.spark.sql.internal.SQLConf
Expand Down Expand Up @@ -47,6 +49,32 @@ class GlutenCSVv2Suite extends GlutenCSVSuite {
super.sparkConf
.set(SQLConf.USE_V1_SOURCE_LIST, "")

override def testNameBlackList: Seq[String] = Seq(
// overwritten with different test
"test for FAILFAST parsing mode",
"SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern"
)

test(GLUTEN_TEST + "test for FAILFAST parsing mode") {
Seq(false, true).foreach {
multiLine =>
val exception = intercept[SparkException] {
spark.read
.format("csv")
.option("multiLine", multiLine)
.options(Map("header" -> "true", "mode" -> "failfast"))
.load(testFile(carsFile))
.collect()
}

assert(exception.getCause.isInstanceOf[GlutenException])
assert(
exception.getMessage.contains(
"[MALFORMED_RECORD_IN_PARSING] Malformed records are detected in record parsing: " +
"[2015,Chevy,Volt,null,null]"))
}
}

test(GLUTEN_TEST + "SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {
withTempPath {
path =>
Expand Down

0 comments on commit 72cecaa

Please sign in to comment.