Skip to content

Commit

Permalink
Fix test failure
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Oct 13, 2023
1 parent 86028c1 commit 64b78fb
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Not supported.
.exclude("to_timestamp exception mode")
// Timezone is not correctly set. TODO: will investigate it.
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing")
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenStringFunctionsSuite]
enableSuite[GlutenRegexpExpressionsSuite]
Expand Down Expand Up @@ -252,6 +256,14 @@ class VeloxTestSettings extends BackendTestSettings {
// The below two are replaced by two modified versions.
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
// Replaced.
.exclude("function to_date")
// Replaced.
.exclude("to_timestamp")
// Not supported.
.exclude("to_timestamp with microseconds precision")
// Not supported.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.spark.sql

import org.apache.spark.SparkUpgradeException
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
Expand Down Expand Up @@ -206,4 +207,134 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}

// Just exclude a test case that expects an exception.
test(GlutenTestConstants.GLUTEN_TEST + "function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
val s3 = "2014-31-12"
val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")

checkAnswer(
df.select(to_date(col("t"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

checkAnswer(
df.selectExpr("to_date(t)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

// now with format
checkAnswer(
df.select(to_date(col("t"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
withSQLConf(confKey -> "corrected") {
checkAnswer(
df.select(to_date(col("s"), "yyyy-MM-dd")),
Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
}

// Different handling case between Velox and Spark.
// withSQLConf(confKey -> "exception") {
// checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
// }

// now switch format
checkAnswer(
df.select(to_date(col("s"), "yyyy-dd-MM")),
Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))

// invalid format
checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
val e = intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
assert(e.getCause.isInstanceOf[IllegalArgumentException])
assert(e.getMessage.contains("You may get a different result due to the upgrading of Spark"))

// February
val x1 = "2016-02-29"
val x2 = "2017-02-29"
val df1 = Seq(x1, x2).toDF("x")
checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
}

// Time format with .S is not supported by Velox, e.g, 2015-07-24 10:00:00.5. Exclude such case.
test(GlutenTestConstants.GLUTEN_TEST + "to_timestamp") {
Seq("legacy", "corrected").foreach {
legacyParserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) {
val date1 = Date.valueOf("2015-07-24")
val date2 = Date.valueOf("2015-07-25")
val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
val s1 = "2015/07/24 10:00:00.5"
val s2 = "2015/07/25 02:02:02.6"
// val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
// val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
val ss1 = "2015-07-24 10:00:00"
val ss2 = "2015-07-25 02:02:02"
val fmt = "yyyy/MM/dd HH:mm:ss.S"
val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")

checkAnswer(
df.select(to_timestamp(col("ss"))),
df.select(timestamp_seconds(unix_timestamp(col("ss")))))
checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2)))
// if (legacyParserPolicy == "legacy") {
// // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off
// // the fractional part of seconds. The behavior was changed by SPARK-27438.
// val legacyFmt = "yyyy/MM/dd HH:mm:ss"
// checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
// Row(ts1), Row(ts2)))
// } else {
// checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
// Row(ts1m), Row(ts2m)))
// }
checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2)))
checkAnswer(
df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
Seq(Row(ts_date1), Row(ts_date2)))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Not supported.
.exclude("to_timestamp exception mode")
// Timezone is not correctly set. TODO: will investigate it.
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing")
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenHashExpressionsSuite]
enableSuite[GlutenIntervalExpressionsSuite]
Expand Down Expand Up @@ -1051,6 +1055,14 @@ class VeloxTestSettings extends BackendTestSettings {
// The below two are replaced by two modified versions.
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
// Replaced.
.exclude("function to_date")
// Replaced.
.exclude("to_timestamp")
// Not supported.
.exclude("to_timestamp with microseconds precision")
// Not supported.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.spark.sql

import org.apache.spark.SparkUpgradeException
import org.apache.spark.sql.GlutenTestConstants.GLUTEN_TEST
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.functions._
Expand Down Expand Up @@ -205,4 +206,132 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}

test(GLUTEN_TEST + "function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
val s3 = "2014-31-12"
val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")

checkAnswer(
df.select(to_date(col("t"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

checkAnswer(
df.selectExpr("to_date(t)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

// now with format
checkAnswer(
df.select(to_date(col("t"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
withSQLConf(confKey -> "corrected") {
checkAnswer(
df.select(to_date(col("s"), "yyyy-MM-dd")),
Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
}
// Different handling case between Velox and Spark.
// withSQLConf(confKey -> "exception") {
// checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
// }

// now switch format
checkAnswer(
df.select(to_date(col("s"), "yyyy-dd-MM")),
Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))

// invalid format
checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
val e = intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
assert(e.getCause.isInstanceOf[IllegalArgumentException])
assert(e.getMessage.contains("You may get a different result due to the upgrading to Spark"))

// February
val x1 = "2016-02-29"
val x2 = "2017-02-29"
val df1 = Seq(x1, x2).toDF("x")
checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
}

// Time format with .S is not supported by Velox, e.g, 2015-07-24 10:00:00.5. Exclude such case.
test(GLUTEN_TEST + "to_timestamp") {
Seq("legacy", "corrected").foreach {
legacyParserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) {
val date1 = Date.valueOf("2015-07-24")
val date2 = Date.valueOf("2015-07-25")
val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
val s1 = "2015/07/24 10:00:00.5"
val s2 = "2015/07/25 02:02:02.6"
// val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
// val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
val ss1 = "2015-07-24 10:00:00"
val ss2 = "2015-07-25 02:02:02"
val fmt = "yyyy/MM/dd HH:mm:ss.S"
val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")

checkAnswer(
df.select(to_timestamp(col("ss"))),
df.select(timestamp_seconds(unix_timestamp(col("ss")))))
checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2)))
// if (legacyParserPolicy == "legacy") {
// // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off
// // the fractional part of seconds. The behavior was changed by SPARK-27438.
// val legacyFmt = "yyyy/MM/dd HH:mm:ss"
// checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
// Row(ts1), Row(ts2)))
// } else {
// checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
// Row(ts1m), Row(ts2m)))
// }
checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2)))
checkAnswer(
df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
Seq(Row(ts_date1), Row(ts_date2)))
}
}
}
}

0 comments on commit 64b78fb

Please sign in to comment.