Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timestamp_micros, timestamp_millis, unix_micros, unix_millis Spark functions #9448

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions velox/docs/functions/spark/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,20 @@ These functions support TIMESTAMP and DATE input types.

SELECT second('2009-07-30 12:58:59'); -- 59

.. spark:function:: timestamp_micros(x) -> timestamp

Returns timestamp from the number of microseconds since UTC epoch.
Supported types are: TINYINT, SMALLINT, INTEGER and BIGINT.::

SELECT timestamp_micros(1230219000123123); -- '2008-12-25 15:30:00.123123'

.. spark:function:: timestamp_millis(x) -> timestamp

Returns timestamp from the number of milliseconds since UTC epoch.
Supported types are: TINYINT, SMALLINT, INTEGER and BIGINT.::

SELECT timestamp_millis(1230219000123); -- '2008-12-25 15:30:00.123'

.. spark:function:: to_unix_timestamp(string) -> integer

Alias for ``unix_timestamp(string) -> integer``.
Expand All @@ -244,6 +258,19 @@ These functions support TIMESTAMP and DATE input types.
SELECT unix_date('1970-01-02'); -- '1'
SELECT unix_date('1969-12-31'); -- '-1'

.. spark:function:: unix_micros(timestamp) -> bigint

Returns the number of microseconds since 1970-01-01 00:00:00 UTC.::

SELECT unix_micros('1970-01-01 00:00:01'); -- 1000000

.. spark:function:: unix_millis(timestamp) -> bigint

Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates
higher levels of precision.::

SELECT unix_millis('1970-01-01 00:00:01'); -- 1000

.. spark:function:: unix_timestamp() -> integer

Returns the current UNIX timestamp in seconds.
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/lib/RegistrationHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ void registerUnaryIntegral(const std::vector<std::string>& aliases) {
registerFunction<T, int64_t, int64_t>(aliases);
}

template <template <class> class T, typename TReturn>
void registerUnaryIntegralWithTReturn(const std::vector<std::string>& aliases) {
registerFunction<T, TReturn, int8_t>(aliases);
registerFunction<T, TReturn, int16_t>(aliases);
registerFunction<T, TReturn, int32_t>(aliases);
registerFunction<T, TReturn, int64_t>(aliases);
}

template <template <class> class T>
void registerUnaryFloatingPoint(const std::vector<std::string>& aliases) {
registerFunction<T, double, double>(aliases);
Expand Down
47 changes: 47 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -760,4 +760,51 @@ struct MakeYMIntervalFunction {
result = totalMonths;
}
};

template <typename T>
Yuhta marked this conversation as resolved.
Show resolved Hide resolved
struct TimestampToMicrosFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = timestamp.toMicros();
}
};

template <typename T>
struct MicrosToTimestampFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

template <typename TInput>
zhli1142015 marked this conversation as resolved.
Show resolved Hide resolved
FOLLY_ALWAYS_INLINE void call(
out_type<Timestamp>& result,
const TInput micros) {
zhli1142015 marked this conversation as resolved.
Show resolved Hide resolved
result = Timestamp::fromMicrosNoError(micros);
}
};

template <typename T>
struct TimestampToMillisFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = timestamp.toMillis();
}
};

template <typename T>
struct MillisToTimestampFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

template <typename TInput>
FOLLY_ALWAYS_INLINE void call(
out_type<Timestamp>& result,
const TInput millis) {
zhli1142015 marked this conversation as resolved.
Show resolved Hide resolved
result = Timestamp::fromMillisNoError(millis);
}
};

} // namespace facebook::velox::functions::sparksql
9 changes: 9 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,15 @@ void registerFunctions(const std::string& prefix) {

VELOX_REGISTER_VECTOR_FUNCTION(udf_make_timestamp, prefix + "make_timestamp");

registerFunction<TimestampToMicrosFunction, int64_t, Timestamp>(
{prefix + "unix_micros"});
registerUnaryIntegralWithTReturn<MicrosToTimestampFunction, Timestamp>(
{prefix + "timestamp_micros"});
registerFunction<TimestampToMillisFunction, int64_t, Timestamp>(
{prefix + "unix_millis"});
registerUnaryIntegralWithTReturn<MillisToTimestampFunction, Timestamp>(
{prefix + "timestamp_millis"});

// Register bloom filter function
registerFunction<BloomFilterMightContainFunction, bool, Varbinary, int64_t>(
{prefix + "might_contain"});
Expand Down
113 changes: 113 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest {
static constexpr int16_t kMaxSmallint = std::numeric_limits<int16_t>::max();
static constexpr int8_t kMinTinyint = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxTinyint = std::numeric_limits<int8_t>::max();
static constexpr int64_t kMinBigint = std::numeric_limits<int64_t>::min();
static constexpr int64_t kMaxBigint = std::numeric_limits<int64_t>::max();

protected:
void setQueryTimeZone(const std::string& timeZone) {
Expand Down Expand Up @@ -983,5 +985,116 @@ TEST_F(DateTimeFunctionsTest, yearOfWeek) {
EXPECT_EQ(2006, yearOfWeek(parseDate("2006-01-02")));
}

TEST_F(DateTimeFunctionsTest, microsToTimestamp) {
const auto microsToTimestamp = [&](int64_t micros) {
return evaluateOnce<Timestamp, int64_t>("timestamp_micros(c0)", micros);
};
EXPECT_EQ(
microsToTimestamp(1000000),
util::fromTimestampString("1970-01-01 00:00:01"));
EXPECT_EQ(
microsToTimestamp(1230219000123123),
util::fromTimestampString("2008-12-25 15:30:00.123123"));

EXPECT_EQ(
microsToTimestamp(kMaxTinyint),
util::fromTimestampString("1970-01-01 00:00:00.000127"));
EXPECT_EQ(
microsToTimestamp(kMinTinyint),
util::fromTimestampString("1969-12-31 23:59:59.999872"));
EXPECT_EQ(
microsToTimestamp(kMaxSmallint),
util::fromTimestampString("1970-01-01 00:00:00.032767"));
EXPECT_EQ(
microsToTimestamp(kMinSmallint),
util::fromTimestampString("1969-12-31 23:59:59.967232"));
EXPECT_EQ(
microsToTimestamp(kMax),
util::fromTimestampString("1970-01-01 00:35:47.483647"));
EXPECT_EQ(
microsToTimestamp(kMin),
util::fromTimestampString("1969-12-31 23:24:12.516352"));
EXPECT_EQ(
microsToTimestamp(kMaxBigint),
util::fromTimestampString("294247-01-10 04:00:54.775807"));
EXPECT_EQ(
microsToTimestamp(kMinBigint),
util::fromTimestampString("-290308-12-21 19:59:05.224192"));
}

TEST_F(DateTimeFunctionsTest, millisToTimestamp) {
const auto millisToTimestamp = [&](int64_t millis) {
return evaluateOnce<Timestamp, int64_t>("timestamp_millis(c0)", millis);
};
EXPECT_EQ(
millisToTimestamp(1000),
util::fromTimestampString("1970-01-01 00:00:01"));
EXPECT_EQ(
millisToTimestamp(1230219000123),
util::fromTimestampString("2008-12-25 15:30:00.123"));

EXPECT_EQ(
millisToTimestamp(kMaxTinyint),
util::fromTimestampString("1970-01-01 00:00:00.127"));
EXPECT_EQ(
millisToTimestamp(kMinTinyint),
util::fromTimestampString("1969-12-31 23:59:59.872"));
EXPECT_EQ(
millisToTimestamp(kMaxSmallint),
util::fromTimestampString("1970-01-01 00:00:32.767"));
EXPECT_EQ(
millisToTimestamp(kMinSmallint),
util::fromTimestampString("1969-12-31 23:59:27.232"));
EXPECT_EQ(
millisToTimestamp(kMax),
util::fromTimestampString("1970-01-25 20:31:23.647"));
EXPECT_EQ(
millisToTimestamp(kMin),
util::fromTimestampString("1969-12-07 03:28:36.352"));
EXPECT_EQ(
millisToTimestamp(kMaxBigint),
util::fromTimestampString("292278994-08-17T07:12:55.807"));
EXPECT_EQ(
millisToTimestamp(kMinBigint),
util::fromTimestampString("-292275055-05-16T16:47:04.192"));
}

TEST_F(DateTimeFunctionsTest, timestampToMicros) {
const auto timestampToMicros = [&](const StringView time) {
return evaluateOnce<int64_t, Timestamp>(
"unix_micros(c0)", util::fromTimestampString(time));
};
EXPECT_EQ(timestampToMicros("1970-01-01 00:00:01"), 1000000);
EXPECT_EQ(timestampToMicros("2008-12-25 15:30:00.123123"), 1230219000123123);

EXPECT_EQ(timestampToMicros("1970-01-01 00:00:00.000127"), kMaxTinyint);
EXPECT_EQ(timestampToMicros("1969-12-31 23:59:59.999872"), kMinTinyint);
EXPECT_EQ(timestampToMicros("1970-01-01 00:00:00.032767"), kMaxSmallint);
EXPECT_EQ(timestampToMicros("1969-12-31 23:59:59.967232"), kMinSmallint);
EXPECT_EQ(timestampToMicros("1970-01-01 00:35:47.483647"), kMax);
EXPECT_EQ(timestampToMicros("1969-12-31 23:24:12.516352"), kMin);
EXPECT_EQ(timestampToMicros("294247-01-10 04:00:54.775807"), kMaxBigint);
EXPECT_EQ(
timestampToMicros("-290308-12-21 19:59:06.224192"), kMinBigint + 1000000);
}

TEST_F(DateTimeFunctionsTest, timestampToMillis) {
const auto timestampToMillis = [&](const StringView time) {
return evaluateOnce<int64_t, Timestamp>(
"unix_millis(c0)", util::fromTimestampString(time));
};
EXPECT_EQ(timestampToMillis("1970-01-01 00:00:01"), 1000);
EXPECT_EQ(timestampToMillis("2008-12-25 15:30:00.123"), 1230219000123);

EXPECT_EQ(timestampToMillis("1970-01-01 00:00:00.127"), kMaxTinyint);
EXPECT_EQ(timestampToMillis("1969-12-31 23:59:59.872"), kMinTinyint);
EXPECT_EQ(timestampToMillis("1970-01-01 00:00:32.767"), kMaxSmallint);
EXPECT_EQ(timestampToMillis("1969-12-31 23:59:27.232"), kMinSmallint);
EXPECT_EQ(timestampToMillis("1970-01-25 20:31:23.647"), kMax);
EXPECT_EQ(timestampToMillis("1969-12-07 03:28:36.352"), kMin);
EXPECT_EQ(timestampToMillis("292278994-08-17T07:12:55.807"), kMaxBigint);
EXPECT_EQ(timestampToMillis("-292275055-05-16T16:47:04.192"), kMinBigint);
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test
15 changes: 15 additions & 0 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,21 @@ struct Timestamp {
return Timestamp(second, nano);
}

static Timestamp fromMicrosNoError(int64_t micros)
#if defined(__has_feature)
#if __has_feature(__address_sanitizer__)
__attribute__((__no_sanitize__("signed-integer-overflow")))
#endif
#endif
{
if (micros >= 0 || micros % 1'000'000 == 0) {
return Timestamp(micros / 1'000'000, (micros % 1'000'000) * 1'000);
}
auto second = micros / 1'000'000 - 1;
auto nano = ((micros - second * 1'000'000) % 1'000'000) * 1'000;
return Timestamp(second, nano);
}

static Timestamp fromNanos(int64_t nanos) {
if (nanos >= 0 || nanos % 1'000'000'000 == 0) {
return Timestamp(nanos / 1'000'000'000, nanos % 1'000'000'000);
Expand Down
Loading