Skip to content

Commit

Permalink
Change input for to_timestamp function to be seconds rather than na…
Browse files Browse the repository at this point in the history
…noseconds, add `to_timestamp_nanos` (#7844)

* Change input for `to_timestamp` function

* docs

* fix examples

* output `to_timestamp` signature as ns
  • Loading branch information
comphead authored Oct 26, 2023
1 parent 0911f15 commit 12b473b
Show file tree
Hide file tree
Showing 14 changed files with 130 additions and 32 deletions.
18 changes: 9 additions & 9 deletions datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ async fn test_uuid_expression() -> Result<()> {
async fn test_extract_date_part() -> Result<()> {
test_expression!("date_part('YEAR', CAST('2000-01-01' AS DATE))", "2000.0");
test_expression!(
"EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00'))",
"EXTRACT(year FROM timestamp '2020-09-08T12:00:00+00:00')",
"2020.0"
);
test_expression!("date_part('QUARTER', CAST('2000-01-01' AS DATE))", "1.0");
Expand Down Expand Up @@ -686,35 +686,35 @@ async fn test_extract_date_part() -> Result<()> {
"12.0"
);
test_expression!(
"EXTRACT(second FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12.12345678"
);
test_expression!(
"EXTRACT(millisecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"EXTRACT(millisecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12123.45678"
);
test_expression!(
"EXTRACT(microsecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12123456.78"
);
test_expression!(
"EXTRACT(nanosecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')",
"1.212345678e10"
);
test_expression!(
"date_part('second', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"date_part('second', timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12.12345678"
);
test_expression!(
"date_part('millisecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"date_part('millisecond', timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12123.45678"
);
test_expression!(
"date_part('microsecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00')",
"12123456.78"
);
test_expression!(
"date_part('nanosecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))",
"date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00')",
"1.212345678e10"
);

Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sql/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ async fn test_arrow_typeof() -> Result<()> {
"+-----------------------------------------------------------------------+",
"| arrow_typeof(date_trunc(Utf8(\"microsecond\"),to_timestamp(Int64(61)))) |",
"+-----------------------------------------------------------------------+",
"| Timestamp(Nanosecond, None) |",
"| Timestamp(Second, None) |",
"+-----------------------------------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
Expand Down
17 changes: 17 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ pub enum BuiltinScalarFunction {
ToTimestampMillis,
/// to_timestamp_micros
ToTimestampMicros,
/// to_timestamp_nanos
ToTimestampNanos,
/// to_timestamp_seconds
ToTimestampSeconds,
/// from_unixtime
Expand Down Expand Up @@ -444,6 +446,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ToTimestamp => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampMillis => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampMicros => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampNanos => Volatility::Immutable,
BuiltinScalarFunction::ToTimestampSeconds => Volatility::Immutable,
BuiltinScalarFunction::Translate => Volatility::Immutable,
BuiltinScalarFunction::Trim => Volatility::Immutable,
Expand Down Expand Up @@ -755,6 +758,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ToTimestamp => Ok(Timestamp(Nanosecond, None)),
BuiltinScalarFunction::ToTimestampMillis => Ok(Timestamp(Millisecond, None)),
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
BuiltinScalarFunction::ToTimestampNanos => Ok(Timestamp(Nanosecond, None)),
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
BuiltinScalarFunction::Now => {
Expand Down Expand Up @@ -995,6 +999,18 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
BuiltinScalarFunction::ToTimestampNanos => Signature::uniform(
1,
vec![
Int64,
Timestamp(Nanosecond, None),
Timestamp(Microsecond, None),
Timestamp(Millisecond, None),
Timestamp(Second, None),
Utf8,
],
self.volatility(),
),
BuiltinScalarFunction::ToTimestampSeconds => Signature::uniform(
1,
vec![
Expand Down Expand Up @@ -1431,6 +1447,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::ToTimestampMillis => &["to_timestamp_millis"],
BuiltinScalarFunction::ToTimestampMicros => &["to_timestamp_micros"],
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],

// hashing functions
Expand Down
6 changes: 6 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,12 @@ scalar_expr!(
date,
"converts a string to a `Timestamp(Microseconds, None)`"
);
scalar_expr!(
ToTimestampNanos,
to_timestamp_nanos,
date,
"converts a string to a `Timestamp(Nanoseconds, None)`"
);
scalar_expr!(
ToTimestampSeconds,
to_timestamp_seconds,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,7 @@ mod tests {
test_evaluate(expr, lit("foobarbaz"));

// Check non string arguments
// to_timestamp("2020-09-08T12:00:00+00:00") --> timestamp(1599566400000000000i64)
// to_timestamp("2020-09-08T12:00:00+00:00") --> timestamp(1599566400i64)
let expr =
call_fn("to_timestamp", vec![lit("2020-09-08T12:00:00+00:00")]).unwrap();
test_evaluate(expr, lit_timestamp_nano(1599566400000000000i64));
Expand Down
11 changes: 10 additions & 1 deletion datafusion/physical-expr/src/datetime_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ pub fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
)
}

/// to_timestamp_nanos SQL function
pub fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
args,
string_to_timestamp_nanos_shim,
"to_timestamp_nanos",
)
}

/// to_timestamp_seconds SQL function
pub fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
handle::<TimestampSecondType, _, TimestampSecondType>(
Expand Down Expand Up @@ -962,7 +971,7 @@ mod tests {
let mut string_builder = StringBuilder::with_capacity(2, 1024);
let mut ts_builder = TimestampNanosecondArray::builder(2);

string_builder.append_value("2020-09-08T13:42:29.190855Z");
string_builder.append_value("2020-09-08T13:42:29.190855");
ts_builder.append_value(1599572549190855000);

string_builder.append_null();
Expand Down
42 changes: 33 additions & 9 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,20 @@ pub fn create_physical_expr(
// so we don't have to pay a per-array/batch cost.
BuiltinScalarFunction::ToTimestamp => {
Arc::new(match input_phy_exprs[0].data_type(input_schema) {
Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => {
|col_values: &[ColumnarValue]| {
cast_column(
&col_values[0],
&DataType::Timestamp(TimeUnit::Nanosecond, None),
None,
)
}
}
Ok(DataType::Int64) => |col_values: &[ColumnarValue]| {
cast_column(
&col_values[0],
&DataType::Timestamp(TimeUnit::Second, None),
None,
)
},
Ok(DataType::Timestamp(_, None)) => |col_values: &[ColumnarValue]| {
cast_column(
&col_values[0],
&DataType::Timestamp(TimeUnit::Nanosecond, None),
None,
)
},
Ok(DataType::Utf8) => datetime_expressions::to_timestamp,
other => {
return internal_err!(
Expand Down Expand Up @@ -129,6 +134,25 @@ pub fn create_physical_expr(
}
})
}
BuiltinScalarFunction::ToTimestampNanos => {
Arc::new(match input_phy_exprs[0].data_type(input_schema) {
Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => {
|col_values: &[ColumnarValue]| {
cast_column(
&col_values[0],
&DataType::Timestamp(TimeUnit::Nanosecond, None),
None,
)
}
}
Ok(DataType::Utf8) => datetime_expressions::to_timestamp_nanos,
other => {
return internal_err!(
"Unsupported data type {other:?} for function to_timestamp_nanos"
);
}
})
}
BuiltinScalarFunction::ToTimestampSeconds => Arc::new({
match input_phy_exprs[0].data_type(input_schema) {
Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => {
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@ enum ScalarFunction {
ArrayEmpty = 115;
ArrayPopBack = 116;
StringToArray = 117;
ToTimestampNanos = 118;
}

message ScalarFunctionNode {
Expand Down
3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ use datafusion_expr::{
random, regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad,
rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt,
starts_with, strpos, substr, substring, tan, tanh, to_hex, to_timestamp_micros,
to_timestamp_millis, to_timestamp_seconds, translate, trim, trunc, upper, uuid,
to_timestamp_millis, to_timestamp_nanos, to_timestamp_seconds, translate, trim,
trunc, upper, uuid,
window_frame::regularize,
AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction,
Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
Expand Down Expand Up @@ -521,6 +522,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::Substr => Self::Substr,
ScalarFunction::ToHex => Self::ToHex,
ScalarFunction::ToTimestampMicros => Self::ToTimestampMicros,
ScalarFunction::ToTimestampNanos => Self::ToTimestampNanos,
ScalarFunction::ToTimestampSeconds => Self::ToTimestampSeconds,
ScalarFunction::Now => Self::Now,
ScalarFunction::CurrentDate => Self::CurrentDate,
Expand Down Expand Up @@ -1592,6 +1594,9 @@ pub fn parse_expr(
ScalarFunction::ToTimestampMicros => {
Ok(to_timestamp_micros(parse_expr(&args[0], registry)?))
}
ScalarFunction::ToTimestampNanos => {
Ok(to_timestamp_nanos(parse_expr(&args[0], registry)?))
}
ScalarFunction::ToTimestampSeconds => {
Ok(to_timestamp_seconds(parse_expr(&args[0], registry)?))
}
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1522,6 +1522,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::Substr => Self::Substr,
BuiltinScalarFunction::ToHex => Self::ToHex,
BuiltinScalarFunction::ToTimestampMicros => Self::ToTimestampMicros,
BuiltinScalarFunction::ToTimestampNanos => Self::ToTimestampNanos,
BuiltinScalarFunction::ToTimestampSeconds => Self::ToTimestampSeconds,
BuiltinScalarFunction::Now => Self::Now,
BuiltinScalarFunction::CurrentDate => Self::CurrentDate,
Expand Down
29 changes: 23 additions & 6 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ SELECT to_timestamp_micros(ts) FROM ts_data_secs LIMIT 3

# to nanos
query P
SELECT to_timestamp(ts) FROM ts_data_secs LIMIT 3
SELECT to_timestamp_nanos(ts) FROM ts_data_secs LIMIT 3
----
2020-09-08T13:42:29
2020-09-08T12:42:29
Expand All @@ -244,7 +244,7 @@ SELECT to_timestamp_seconds(ts) FROM ts_data_micros LIMIT 3
2020-09-08T11:42:29


# Original column is micros, convert to nanos and check timestamp
# Original column is micros, convert to seconds and check timestamp

query P
SELECT to_timestamp(ts) FROM ts_data_micros LIMIT 3
Expand All @@ -266,7 +266,7 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3;
# to_timestamp

query I
SELECT COUNT(*) FROM ts_data_nanos where ts > to_timestamp('2020-09-08T12:00:00+00:00')
SELECT COUNT(*) FROM ts_data_nanos where ts > timestamp '2020-09-08T12:00:00+00:00'
----
2

Expand Down Expand Up @@ -375,15 +375,14 @@ set datafusion.optimizer.skip_failed_rules = true
query P
select to_timestamp(a) from (select to_timestamp(1) as a) A;
----
1970-01-01T00:00:00.000000001
1970-01-01T00:00:01

# cast_to_timestamp_seconds_twice
query P
select to_timestamp_seconds(a) from (select to_timestamp_seconds(1) as a)A
----
1970-01-01T00:00:01


# cast_to_timestamp_millis_twice
query P
select to_timestamp_millis(a) from (select to_timestamp_millis(1) as a)A;
Expand All @@ -396,18 +395,30 @@ select to_timestamp_micros(a) from (select to_timestamp_micros(1) as a)A;
----
1970-01-01T00:00:00.000001

# cast_to_timestamp_nanos_twice
query P
select to_timestamp_nanos(a) from (select to_timestamp_nanos(1) as a)A;
----
1970-01-01T00:00:00.000000001

# to_timestamp_i32
query P
select to_timestamp(cast (1 as int));
----
1970-01-01T00:00:00.000000001
1970-01-01T00:00:01

# to_timestamp_micros_i32
query P
select to_timestamp_micros(cast (1 as int));
----
1970-01-01T00:00:00.000001

# to_timestamp_nanos_i32
query P
select to_timestamp_nanos(cast (1 as int));
----
1970-01-01T00:00:00.000000001

# to_timestamp_millis_i32
query P
select to_timestamp_millis(cast (1 as int));
Expand Down Expand Up @@ -1776,3 +1787,9 @@ query B
SELECT TIMESTAMPTZ '2020-01-01 00:00:00Z' = TIMESTAMP '2020-01-01'
----
true

# verify to_timestamp edge cases to be in sync with postgresql
query PPPPP
SELECT to_timestamp(null), to_timestamp(-62125747200), to_timestamp(0), to_timestamp(1926632005177), to_timestamp(1926632005)
----
NULL 0001-04-25T00:00:00 1970-01-01T00:00:00 +63022-07-16T12:59:37 2031-01-19T23:33:25
Loading

0 comments on commit 12b473b

Please sign in to comment.