From 0ec7858cfdcfca003d5496eb09717dffc818a3c2 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 23 Jan 2024 23:32:46 -0500 Subject: [PATCH] Updated documentation concerning to_timestamp scalar function including adding missing link in the datafusion-examples/README file. Resolves #8980 --- datafusion-examples/README.md | 1 + ...aframe_to_timestamp.rs => to_timestamp.rs} | 37 ++++++- .../source/user-guide/sql/scalar_functions.md | 98 ++++++++++++++++++- 3 files changed, 131 insertions(+), 5 deletions(-) rename datafusion-examples/examples/{dataframe_to_timestamp.rs => to_timestamp.rs} (70%) diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index eecb63d3be65..298ee9364efe 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -64,6 +64,7 @@ cargo run --example csv_sql - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF) - [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF) - [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF) +- [`to_timestamp.rs`](examples/to_timestamp.rs): Examples of using the to_timestamp functions - [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF) ## Distributed diff --git a/datafusion-examples/examples/dataframe_to_timestamp.rs b/datafusion-examples/examples/to_timestamp.rs similarity index 70% rename from datafusion-examples/examples/dataframe_to_timestamp.rs rename to datafusion-examples/examples/to_timestamp.rs index 8caa9245596b..a07dbaefb75b 100644 --- a/datafusion-examples/examples/dataframe_to_timestamp.rs +++ b/datafusion-examples/examples/to_timestamp.rs @@ -24,7 +24,8 @@ use datafusion::error::Result; use datafusion::prelude::*; use datafusion_common::assert_contains; -/// This example demonstrates how to use the to_timestamp function in the DataFrame API as well as via sql. +/// This example demonstrates how to use the to_timestamp series +/// of functions in the DataFrame API as well as via sql. #[tokio::main] async fn main() -> Result<()> { // define a schema. @@ -61,7 +62,8 @@ async fn main() -> Result<()> { // use to_timestamp function to convert col 'a' to timestamp type using the default parsing let df = df.with_column("a", to_timestamp(vec![col("a")]))?; - // use to_timestamp_seconds function to convert col 'b' to timestamp(Seconds) type using a list of chrono formats to try + // use to_timestamp_seconds function to convert col 'b' to timestamp(Seconds) type using a list + // of chrono formats (https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to try let df = df.with_column( "b", to_timestamp_seconds(vec![ @@ -90,14 +92,31 @@ async fn main() -> Result<()> { df.show().await?; // use sql to convert a static string to a timestamp using a list of chrono formats to try - let df = ctx.sql("select to_timestamp('01-14-2023 01:01:30+05:30', '%+', '%d-%m-%Y %H/%M/%S', '%m-%d-%Y %H:%M:%S%#z')").await?; + // note that one of the formats is invalid ('%q') but since DataFusion will try all the + // formats until it encounters one that parses the timestamp expression successfully + // no error will be returned + let df = ctx.sql("select to_timestamp_micros('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z')").await?; + + // print the results + df.show().await?; + + // casting a string to TIMESTAMP will also work for RFC3339 timestamps + let df = ctx + .sql("select to_timestamp_millis(TIMESTAMP '2022-08-03T14:38:50Z')") + .await?; + + // print the results + df.show().await?; + + // unix timestamps (in seconds) are also supported + let df = ctx.sql("select to_timestamp(1926632005)").await?; // print the results df.show().await?; // use sql to convert a static string to a timestamp using a non-matching chrono format to try let result = ctx - .sql("select to_timestamp('01-14-2023 01/01/30', '%d-%m-%Y %H:%M:%S')") + .sql("select to_timestamp_nanos('01-14-2023 01/01/30', '%d-%m-%Y %H:%M:%S')") .await? .collect() .await; @@ -105,5 +124,15 @@ async fn main() -> Result<()> { let expected = "Error parsing timestamp from '01-14-2023 01/01/30' using format '%d-%m-%Y %H:%M:%S': input contains invalid characters"; assert_contains!(result.unwrap_err().to_string(), expected); + // note that using arrays for the chrono formats is not supported + let result = ctx + .sql("SELECT to_timestamp('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'))") + .await? + .collect() + .await; + + let expected = "to_timestamp function unsupported data type at index 1: List"; + assert_contains!(result.unwrap_err().to_string(), expected); + Ok(()) } diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index c3def3f89b5b..9da260bf52f7 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1311,8 +1311,8 @@ regexp_replace(str, regexp, replacement, flags) - [date_part](#date_part) - [datepart](#datepart) - [extract](#extract) -- [to_timestamp](#to_timestamp) - [today](#today) +- [to_timestamp](#to_timestamp) - [to_timestamp_millis](#to_timestamp_millis) - [to_timestamp_micros](#to_timestamp_micros) - [to_timestamp_seconds](#to_timestamp_seconds) @@ -1526,6 +1526,26 @@ to_timestamp(expression[, ..., format_n]) [chrono format]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html +#### Example + +``` +❯ select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------+ +| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------+ +❯ select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------+ +| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++--------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/dataframe_to_timestamp.rs + ### `to_timestamp_millis` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). @@ -1546,6 +1566,25 @@ to_timestamp_millis(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example +``` +❯ select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123 | ++------------------------------------------------------------------+ +❯ select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/dataframe_to_timestamp.rs + ### `to_timestamp_micros` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). @@ -1566,6 +1605,25 @@ to_timestamp_micros(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example +``` +❯ select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456 | ++------------------------------------------------------------------+ +❯ select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/dataframe_to_timestamp.rs + ### `to_timestamp_nanos` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). @@ -1586,6 +1644,25 @@ to_timestamp_nanos(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example +``` +❯ select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------------+ +❯ select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/dataframe_to_timestamp.rs + ### `to_timestamp_seconds` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). @@ -1606,6 +1683,25 @@ to_timestamp_seconds(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example +``` +❯ select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); ++-------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-------------------------------------------------------------------+ +| 2023-01-31T14:26:56 | ++-------------------------------------------------------------------+ +❯ select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++----------------------------------------------------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++----------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00 | ++----------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/dataframe_to_timestamp.rs + ### `from_unixtime` Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`).