From 5180b68dc75dbc802f7e4f5538f4acd220fb6d1e Mon Sep 17 00:00:00 2001 From: rexan Date: Mon, 28 Oct 2024 09:02:36 -0700 Subject: [PATCH] Add Spark CAST(integral as timestamp) (#11089) Summary: Add Spark CAST (integral as timestamp). The input value is treated as the number of seconds since the epoch (1970-01-01 00:00:00 UTC). Supported types are tinyint, smallint, integer and bigint. Spark's implementation: https://github.com/apache/spark/blob/v3.5.1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L680 Pull Request resolved: https://github.com/facebookincubator/velox/pull/11089 Reviewed By: amitkdutta Differential Revision: D64805077 Pulled By: pedroerp fbshipit-source-id: 0a551641b17540c92fb602b8a0f3524d574cb03e --- velox/docs/functions/spark/conversion.rst | 20 ++++++++ velox/expression/CastExpr-inl.h | 10 ++++ velox/expression/CastHooks.h | 2 + velox/expression/PrestoCastHooks.cpp | 5 ++ velox/expression/PrestoCastHooks.h | 2 + .../sparksql/specialforms/SparkCastHooks.cpp | 15 ++++++ .../sparksql/specialforms/SparkCastHooks.h | 4 ++ .../sparksql/tests/SparkCastExprTest.cpp | 46 +++++++++++++++++++ 8 files changed, 104 insertions(+) diff --git a/velox/docs/functions/spark/conversion.rst b/velox/docs/functions/spark/conversion.rst index 037e3baebf5a..51bc63d0843a 100644 --- a/velox/docs/functions/spark/conversion.rst +++ b/velox/docs/functions/spark/conversion.rst @@ -239,3 +239,23 @@ Valid example SELECT cast(cast(180 as smallint) as binary); -- [00 B4] SELECT cast(cast(180000 as integer) as binary); -- [00 02 BF 20] SELECT cast(cast(180000 as bigint) as binary); -- [00 00 00 00 00 02 BF 20] + +Cast to Timestamp +----------------- + +From integral types +^^^^^^^^^^^^^^^^^^^ + +Casting integral value to timestamp type is allowed. +The input value is treated as the number of seconds since the epoch (1970-01-01 00:00:00 UTC). +Supported types are tinyint, smallint, integer and bigint. + +Valid example + +:: + + SELECT cast(0 as timestamp); -- 1970-01-01 00:00:00 + SELECT cast(1727181032 as timestamp); -- 2024-09-24 12:30:32 + SELECT cast(9223372036855 as timestamp); -- 294247-01-10 04:00:54.775807 + SELECT cast(-9223372036855 as timestamp); -- 290308-12-21 19:59:05.224192 + \ No newline at end of file diff --git a/velox/expression/CastExpr-inl.h b/velox/expression/CastExpr-inl.h index e767c5277cff..f660b992fca7 100644 --- a/velox/expression/CastExpr-inl.h +++ b/velox/expression/CastExpr-inl.h @@ -276,6 +276,16 @@ void CastExpr::applyCastKernel( try { auto inputRowValue = input->valueAt(row); + if constexpr ( + (FromKind == TypeKind::TINYINT || FromKind == TypeKind::SMALLINT || + FromKind == TypeKind::INTEGER || FromKind == TypeKind::BIGINT) && + ToKind == TypeKind::TIMESTAMP) { + const auto castResult = + hooks_->castIntToTimestamp((int64_t)inputRowValue); + setResultOrError(castResult, row); + return; + } + // Optimize empty input strings casting by avoiding throwing exceptions. if constexpr ( FromKind == TypeKind::VARCHAR || FromKind == TypeKind::VARBINARY) { diff --git a/velox/expression/CastHooks.h b/velox/expression/CastHooks.h index dfd88ce55f47..85231846e346 100644 --- a/velox/expression/CastHooks.h +++ b/velox/expression/CastHooks.h @@ -35,6 +35,8 @@ class CastHooks { virtual Expected castStringToTimestamp( const StringView& view) const = 0; + virtual Expected castIntToTimestamp(int64_t seconds) const = 0; + virtual Expected castStringToDate( const StringView& dateString) const = 0; diff --git a/velox/expression/PrestoCastHooks.cpp b/velox/expression/PrestoCastHooks.cpp index 4876ef31aa3c..9a2b64e5a70a 100644 --- a/velox/expression/PrestoCastHooks.cpp +++ b/velox/expression/PrestoCastHooks.cpp @@ -67,6 +67,11 @@ Expected PrestoCastHooks::castStringToTimestamp( return result.first; } +Expected PrestoCastHooks::castIntToTimestamp(int64_t seconds) const { + return folly::makeUnexpected( + Status::UserError("Conversion to Timestamp is not supported")); +} + Expected PrestoCastHooks::castStringToDate( const StringView& dateString) const { // Cast from string to date allows only complete ISO 8601 formatted strings: diff --git a/velox/expression/PrestoCastHooks.h b/velox/expression/PrestoCastHooks.h index af129557a955..a0a1fce7491f 100644 --- a/velox/expression/PrestoCastHooks.h +++ b/velox/expression/PrestoCastHooks.h @@ -30,6 +30,8 @@ class PrestoCastHooks : public CastHooks { Expected castStringToTimestamp( const StringView& view) const override; + Expected castIntToTimestamp(int64_t seconds) const override; + // Uses standard cast mode to cast from string to date. Expected castStringToDate( const StringView& dateString) const override; diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp index 061ccc46198d..fcab616e1b07 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp @@ -26,6 +26,21 @@ Expected SparkCastHooks::castStringToTimestamp( view.data(), view.size(), util::TimestampParseMode::kSparkCast); } +Expected SparkCastHooks::castIntToTimestamp(int64_t seconds) const { + // Spark internally use microsecond precision for timestamp. + // To avoid overflow, we need to check the range of seconds. + static constexpr int64_t maxSeconds = std::numeric_limits::max() / + (Timestamp::kMicrosecondsInMillisecond * + Timestamp::kMillisecondsInSecond); + if (seconds > maxSeconds) { + return Timestamp::fromMicrosNoError(std::numeric_limits::max()); + } + if (seconds < -maxSeconds) { + return Timestamp::fromMicrosNoError(std::numeric_limits::min()); + } + return Timestamp(seconds, 0); +} + Expected SparkCastHooks::castStringToDate( const StringView& dateString) const { // Allows all patterns supported by Spark: diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.h b/velox/functions/sparksql/specialforms/SparkCastHooks.h index 61e52916e5f0..c7d298a0ba4e 100644 --- a/velox/functions/sparksql/specialforms/SparkCastHooks.h +++ b/velox/functions/sparksql/specialforms/SparkCastHooks.h @@ -27,6 +27,10 @@ class SparkCastHooks : public exec::CastHooks { Expected castStringToTimestamp( const StringView& view) const override; + /// When casting integral value as timestamp, the input is treated as the + /// number of seconds since the epoch (1970-01-01 00:00:00 UTC). + Expected castIntToTimestamp(int64_t seconds) const override; + /// 1) Removes all leading and trailing UTF8 white-spaces before cast. 2) Uses /// non-standard cast mode to cast from string to date. Expected castStringToDate( diff --git a/velox/functions/sparksql/tests/SparkCastExprTest.cpp b/velox/functions/sparksql/tests/SparkCastExprTest.cpp index ba62ff4d6549..800710d053b8 100644 --- a/velox/functions/sparksql/tests/SparkCastExprTest.cpp +++ b/velox/functions/sparksql/tests/SparkCastExprTest.cpp @@ -91,6 +91,24 @@ class SparkCastExprTest : public functions::test::CastBaseTest { 72, std::nullopt})); } + + template + void testIntegralToTimestampCast() { + testCast( + makeNullableFlatVector({ + 0, + 1, + std::numeric_limits::max(), + std::numeric_limits::min(), + std::nullopt, + }), + makeNullableFlatVector( + {Timestamp(0, 0), + Timestamp(1, 0), + Timestamp(std::numeric_limits::max(), 0), + Timestamp(std::numeric_limits::min(), 0), + std::nullopt})); + } }; TEST_F(SparkCastExprTest, date) { @@ -245,6 +263,34 @@ TEST_F(SparkCastExprTest, stringToTimestamp) { testCast("timestamp", input, expected); } +TEST_F(SparkCastExprTest, intToTimestamp) { + // Cast bigint as timestamp. + testCast( + makeNullableFlatVector({ + 0, + 1727181032, + -1727181032, + 9223372036855, + -9223372036856, + std::numeric_limits::max(), + std::numeric_limits::min(), + }), + makeNullableFlatVector({ + Timestamp(0, 0), + Timestamp(1727181032, 0), + Timestamp(-1727181032, 0), + Timestamp(9223372036854, 775'807'000), + Timestamp(-9223372036855, 224'192'000), + Timestamp(9223372036854, 775'807'000), + Timestamp(-9223372036855, 224'192'000), + })); + + // Cast tinyint/smallint/integer as timestamp. + testIntegralToTimestampCast(); + testIntegralToTimestampCast(); + testIntegralToTimestampCast(); +} + TEST_F(SparkCastExprTest, primitiveInvalidCornerCases) { // To integer. {