Skip to content

Commit

Permalink
[GLUTEN-8215][VL] Support cast timestamp to date (#8212)
Browse files Browse the repository at this point in the history
  • Loading branch information
zml1206 authored Dec 17, 2024
1 parent b572715 commit 36f0a8f
Show file tree
Hide file tree
Showing 11 changed files with 388 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1791,6 +1791,13 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
}

test("cast timestamp to date") {
val query = "select cast(ts as date) from values (timestamp'2024-01-01 00:00:00') as tab(ts)"
runQueryAndCompare(query) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}

test("timestamp broadcast join") {
spark.range(0, 5).createOrReplaceTempView("right")
spark.sql("SELECT id, timestamp_micros(id) as ts from right").createOrReplaceTempView("left")
Expand Down
11 changes: 7 additions & 4 deletions cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,13 @@ bool SubstraitToVeloxPlanValidator::validateCast(
case TypeKind::VARBINARY:
LOG_VALIDATION_MSG("Invalid input type in casting: ARRAY/MAP/ROW/VARBINARY.");
return false;
case TypeKind::TIMESTAMP: {
LOG_VALIDATION_MSG("Casting from TIMESTAMP is not supported or has incorrect result.");
return false;
}
case TypeKind::TIMESTAMP:
// Only support cast timestamp to date
if (!toType->isDate()) {
LOG_VALIDATION_MSG(
"Casting from TIMESTAMP to " + toType->toString() + " is not supported or has incorrect result.");
return false;
}
default: {
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not throw
// exception when format is "yyyy-dd-aa".
.exclude("function to_date")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}

testGluten("function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
val s3 = "2014-31-12"
val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")

checkAnswer(
df.select(to_date(col("t"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

checkAnswer(
df.selectExpr("to_date(t)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

// now with format
checkAnswer(
df.select(to_date(col("t"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
withSQLConf(confKey -> "corrected") {
checkAnswer(
df.select(to_date(col("s"), "yyyy-MM-dd")),
Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
}
// legacyParserPolicy is not respected by Gluten.
// withSQLConf(confKey -> "exception") {
// checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
// }

// now switch format
checkAnswer(
df.select(to_date(col("s"), "yyyy-dd-MM")),
Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))

// invalid format
checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
// velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
// val e =
// intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
// assert(e.getCause.isInstanceOf[IllegalArgumentException])
// assert(
// e.getMessage.contains("You may get a different result due to the upgrading to Spark"))

// February
val x1 = "2016-02-29"
val x2 = "2017-02-29"
val df1 = Seq(x1, x2).toDF("x")
checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not throw
// exception when format is "yyyy-dd-aa".
.exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}

testGluten("function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
val s3 = "2014-31-12"
val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")

checkAnswer(
df.select(to_date(col("t"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

checkAnswer(
df.selectExpr("to_date(t)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

// now with format
checkAnswer(
df.select(to_date(col("t"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
withSQLConf(confKey -> "corrected") {
checkAnswer(
df.select(to_date(col("s"), "yyyy-MM-dd")),
Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
}
// legacyParserPolicy is not respected by Gluten.
// withSQLConf(confKey -> "exception") {
// checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
// }

// now switch format
checkAnswer(
df.select(to_date(col("s"), "yyyy-dd-MM")),
Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))

// invalid format
checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
// velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
// val e =
// intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
// assert(e.getCause.isInstanceOf[IllegalArgumentException])
// assert(
// e.getMessage.contains("You may get a different result due to the upgrading to Spark"))

// February
val x1 = "2016-02-29"
val x2 = "2017-02-29"
val df1 = Seq(x1, x2).toDF("x")
checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not throw
// exception when format is "yyyy-dd-aa".
.exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}

testGluten("function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
val s3 = "2014-31-12"
val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")

checkAnswer(
df.select(to_date(col("t"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

checkAnswer(
df.selectExpr("to_date(t)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))

// now with format
checkAnswer(
df.select(to_date(col("t"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2014-12-31")),
Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"), "yyyy-MM-dd")),
Seq(
Row(Date.valueOf("2015-07-22")),
Row(Date.valueOf("2015-07-01")),
Row(Date.valueOf("2014-12-31"))))
val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
withSQLConf(confKey -> "corrected") {
checkAnswer(
df.select(to_date(col("s"), "yyyy-MM-dd")),
Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
}
// legacyParserPolicy is not respected by Gluten.
// withSQLConf(confKey -> "exception") {
// checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
// }

// now switch format
checkAnswer(
df.select(to_date(col("s"), "yyyy-dd-MM")),
Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))

// invalid format
checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
// velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
// val e =
// intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
// assert(e.getCause.isInstanceOf[IllegalArgumentException])
// assert(
// e.getMessage.contains("You may get a different result due to the upgrading to Spark"))

// February
val x1 = "2016-02-29"
val x2 = "2017-02-29"
val df1 = Seq(x1, x2).toDF("x")
checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with GlutenTestsTrait {

testGluten("missing cases - from boolean") {
(DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach {
t =>
t match {
case BooleanType =>
checkEvaluation(cast(cast(true, BooleanType), t), true)
checkEvaluation(cast(cast(false, BooleanType), t), false)
case _ =>
checkEvaluation(cast(cast(true, BooleanType), t), 1)
checkEvaluation(cast(cast(false, BooleanType), t), 0)
}
case t @ BooleanType =>
checkEvaluation(cast(cast(true, BooleanType), t), true)
checkEvaluation(cast(cast(false, BooleanType), t), false)
case t =>
checkEvaluation(cast(cast(true, BooleanType), t), 1)
checkEvaluation(cast(cast(false, BooleanType), t), 0)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not throw
// exception when format is "yyyy-dd-aa".
.exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
Expand Down
Loading

0 comments on commit 36f0a8f

Please sign in to comment.