Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (datetime) documentation 1/2 (#13920)
Browse files Browse the repository at this point in the history
* doc-gen: migrate scalar functions (datetime) documentation 1/2

* fix: fix typo and update function docs

---------

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
  • Loading branch information
Chen-Yuan-Lai and Cheng-Yuan-Lai authored Dec 29, 2024
1 parent fb5378d commit a203c2b
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 218 deletions.
30 changes: 11 additions & 19 deletions datafusion/functions/src/datetime/current_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
use chrono::{Datelike, NaiveDate};

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use std::sync::OnceLock;
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC date.
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
"#,
syntax_example = "current_date()"
)]
#[derive(Debug)]
pub struct CurrentDateFunc {
signature: Signature,
Expand Down Expand Up @@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_date_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_date_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Returns the current UTC date.
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
"#,
"current_date()")
.build()
})
}
30 changes: 11 additions & 19 deletions datafusion/functions/src/datetime/current_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Time64;
use arrow::datatypes::TimeUnit::Nanosecond;
use std::any::Any;
use std::sync::OnceLock;

use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC time.
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
"#,
syntax_example = "current_time()"
)]
#[derive(Debug)]
pub struct CurrentTimeFunc {
signature: Signature,
Expand Down Expand Up @@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_current_time_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_current_time_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Returns the current UTC time.
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
"#,
"current_time()")
.build()
})
}
118 changes: 57 additions & 61 deletions datafusion/functions/src/datetime/date_bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::temporal_conversions::NANOSECONDS;
use arrow::array::types::{
Expand All @@ -37,10 +37,64 @@ use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};
use datafusion_macros::user_doc;

use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
"#,
syntax_example = "date_bin(interval, expression, origin-timestamp)",
sql_example = r#"```sql
-- Bin the timestamp into 1 day intervals
> SELECT date_bin(interval '1 day', time) as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T00:00:00 |
| 2023-01-03T00:00:00 |
+---------------------+
2 row(s) fetched.
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T03:00:00 |
| 2023-01-03T03:00:00 |
+---------------------+
2 row(s) fetched.
```"#,
argument(name = "interval", description = "Bin interval."),
argument(
name = "expression",
description = "Time expression to operate on. Can be a constant, column, or function."
),
argument(
name = "origin-timestamp",
description = r#"Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). The following intervals are supported:
- nanoseconds
- microseconds
- milliseconds
- seconds
- minutes
- hours
- days
- weeks
- months
- years
- century
"#
)
)]
#[derive(Debug)]
pub struct DateBinFunc {
signature: Signature,
Expand Down Expand Up @@ -169,68 +223,10 @@ impl ScalarUDFImpl for DateBinFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_bin_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_bin_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
"#,
"date_bin(interval, expression, origin-timestamp)")
.with_sql_example(r#"```sql
-- Bin the timestamp into 1 day intervals
> SELECT date_bin(interval '1 day', time) as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T00:00:00 |
| 2023-01-03T00:00:00 |
+---------------------+
2 row(s) fetched.
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
+---------------------+
| bin |
+---------------------+
| 2023-01-01T03:00:00 |
| 2023-01-03T03:00:00 |
+---------------------+
2 row(s) fetched.
```
"#)
.with_argument("interval", "Bin interval.")
.with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
.with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
The following intervals are supported:
- nanoseconds
- microseconds
- milliseconds
- seconds
- minutes
- hours
- days
- weeks
- months
- years
- century
")
.build()
})
}

enum Interval {
Nanoseconds(i64),
Months(i64),
Expand Down
78 changes: 36 additions & 42 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::str::FromStr;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
Expand All @@ -41,11 +41,42 @@ use datafusion_common::{
ExprSchema, Result, ScalarValue,
};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation, Expr,
ScalarUDFImpl, Signature, TypeSignature, Volatility,
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, TypeSignature,
Volatility,
};
use datafusion_expr_common::signature::TypeSignatureClass;

use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Returns the specified part of the date as an integer.",
syntax_example = "date_part(part, expression)",
alternative_syntax = "extract(field FROM source)",
argument(
name = "part",
description = r#"Part of the date to return. The following date parts are supported:
- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
- week (week of the year)
- day (day of the month)
- hour
- minute
- second
- millisecond
- microsecond
- nanosecond
- dow (day of the week)
- doy (day of the year)
- epoch (seconds since Unix epoch)
"#
),
argument(
name = "expression",
description = "Time expression to operate on. Can be a constant, column, or function."
)
)]
#[derive(Debug)]
pub struct DatePartFunc {
signature: Signature,
Expand Down Expand Up @@ -190,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_date_part_doc())
self.doc()
}
}

Expand All @@ -206,43 +237,6 @@ fn part_normalization(part: &str) -> &str {
.unwrap_or(part)
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_date_part_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Returns the specified part of the date as an integer.",
"date_part(part, expression)")
.with_argument(
"part",
r#"Part of the date to return. The following date parts are supported:
- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
- week (week of the year)
- day (day of the month)
- hour
- minute
- second
- millisecond
- microsecond
- nanosecond
- dow (day of the week)
- doy (day of the year)
- epoch (seconds since Unix epoch)
"#,
)
.with_argument(
"expression",
"Time expression to operate on. Can be a constant, column, or function.",
)
.with_alternative_syntax("extract(field FROM source)")
.build()
})
}

/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
/// result to a total number of seconds, milliseconds, microseconds or
/// nanoseconds
Expand Down
Loading

0 comments on commit a203c2b

Please sign in to comment.