diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index e583523d84a0..c8fbee4d90d8 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::cmp::Ordering; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array, @@ -31,12 +31,28 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a specified number of characters from the left side of a string.", + syntax_example = "left(str, n)", + sql_example = r#"```sql +> select left('datafusion', 4); ++-----------------------------------+ +| left(Utf8("datafusion"),Int64(4)) | ++-----------------------------------+ +| data | ++-----------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "Number of characters to return."), + related_udf(name = "right") +)] #[derive(Debug)] pub struct LeftFunc { signature: Signature, @@ -99,36 +115,10 @@ impl ScalarUDFImpl for LeftFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_left_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_left_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a specified number of characters from the left side of a string.", - "left(str, n)", - ) - .with_sql_example( - r#"```sql -> select left('datafusion', 4); -+-----------------------------------+ -| left(Utf8("datafusion"),Int64(4)) | -+-----------------------------------+ -| data | -+-----------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of characters to return.") - .with_related_udf("right") - .build() - }) -} - /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters. /// left('abcde', 2) = 'ab' /// The implementation uses UTF-8 code points as characters diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index f1750d2277ca..0b41071cad9e 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::fmt::Write; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array, @@ -31,12 +31,32 @@ use crate::strings::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Pads the left side of a string with another string to a specified string length.", + syntax_example = "lpad(str, n[, padding_str])", + sql_example = r#"```sql +> select lpad('Dolly', 10, 'hello'); ++---------------------------------------------+ +| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | ++---------------------------------------------+ +| helloDolly | ++---------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "String length to pad to."), + argument( + name = "padding_str", + description = "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + related_udf(name = "rpad") +)] #[derive(Debug)] pub struct LPadFunc { signature: Signature, @@ -103,34 +123,10 @@ impl ScalarUDFImpl for LPadFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_lpad_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_lpad_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Pads the left side of a string with another string to a specified string length.", - "lpad(str, n[, padding_str])") - .with_sql_example(r#"```sql -> select lpad('Dolly', 10, 'hello'); -+---------------------------------------------+ -| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | -+---------------------------------------------+ -| helloDolly | -+---------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("n", "String length to pad to.") - .with_argument("padding_str", "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._") - .with_related_udf("rpad") - .build() - }) -} - /// Extends the string to length 'length' by prepending the characters fill (a space by default). /// If the string is already longer than length then it is truncated (on the right). /// lpad('hi', 5, 'xy') = 'xyxhi' diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index 4e414fbae5cb..3561176f1dd7 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::cmp::{max, Ordering}; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array, @@ -31,12 +31,28 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a specified number of characters from the right side of a string.", + syntax_example = "right(str, n)", + sql_example = r#"```sql +> select right('datafusion', 6); ++------------------------------------+ +| right(Utf8("datafusion"),Int64(6)) | ++------------------------------------+ +| fusion | ++------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "Number of characters to return."), + related_udf(name = "left") +)] #[derive(Debug)] pub struct RightFunc { signature: Signature, @@ -99,36 +115,10 @@ impl ScalarUDFImpl for RightFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_right_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_right_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a specified number of characters from the right side of a string.", - "right(str, n)", - ) - .with_sql_example( - r#"```sql -> select right('datafusion', 6); -+------------------------------------+ -| right(Utf8("datafusion"),Int64(6)) | -+------------------------------------+ -| fusion | -+------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of characters to return") - .with_related_udf("left") - .build() - }) -} - /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters. /// right('abcde', 2) = 'de' /// The implementation uses UTF-8 code points as characters diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index d5a0079c72aa..890230ac4344 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -25,17 +25,37 @@ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; use datafusion_common::DataFusionError; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::fmt::Write; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use unicode_segmentation::UnicodeSegmentation; use DataType::{LargeUtf8, Utf8, Utf8View}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Pads the right side of a string with another string to a specified string length.", + syntax_example = "rpad(str, n[, padding_str])", + sql_example = r#"```sql +> select rpad('datafusion', 20, '_-'); ++-----------------------------------------------+ +| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | ++-----------------------------------------------+ +| datafusion_-_-_-_-_- | ++-----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "String length to pad to."), + argument( + name = "padding_str", + description = "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + related_udf(name = "lpad") +)] #[derive(Debug)] pub struct RPadFunc { signature: Signature, @@ -122,38 +142,10 @@ impl ScalarUDFImpl for RPadFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_rpad_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_rpad_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Pads the right side of a string with another string to a specified string length.", - "rpad(str, n[, padding_str])") - .with_sql_example(r#"```sql -> select rpad('datafusion', 20, '_-'); -+-----------------------------------------------+ -| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | -+-----------------------------------------------+ -| datafusion_-_-_-_-_- | -+-----------------------------------------------+ -```"#) - .with_standard_argument( - "str", - Some("String"), - ) - .with_argument("n", "String length to pad to.") - .with_argument("padding_str", - "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._") - .with_related_udf("lpad") - .build() - }) -} - pub fn rpad( args: &[ArrayRef], ) -> Result { diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 569af87a4b50..b4bfc2d87627 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -16,18 +16,34 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use crate::strings::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_int_type}; use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.", + syntax_example = "strpos(str, substr)", + alternative_syntax = "position(substr in origstr)", + sql_example = r#"```sql +> select strpos('datafusion', 'fus'); ++----------------------------------------+ +| strpos(Utf8("datafusion"),Utf8("fus")) | ++----------------------------------------+ +| 5 | ++----------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring expression to search for.") +)] #[derive(Debug)] pub struct StrposFunc { signature: Signature, @@ -79,33 +95,10 @@ impl ScalarUDFImpl for StrposFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_strpos_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_strpos_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.", - "strpos(str, substr)") - .with_sql_example(r#"```sql -> select strpos('datafusion', 'fus'); -+----------------------------------------+ -| strpos(Utf8("datafusion"),Utf8("fus")) | -+----------------------------------------+ -| 5 | -+----------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring expression to search for.") - .with_alternative_syntax("position(substr in origstr)") - .build() - }) -} - fn strpos(args: &[ArrayRef]) -> Result { match (args[0].data_type(), args[1].data_type()) { (DataType::Utf8, DataType::Utf8) => { diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 687f77dbef5b..df6a50ef1775 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use crate::strings::{make_and_append_view, StringArrayType}; use crate::utils::{make_scalar_function, utf8_to_str_type}; @@ -28,11 +28,34 @@ use arrow::datatypes::DataType; use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "String Functions"), + description = "Extracts a substring of a specified number of characters from a specific starting position in a string.", + syntax_example = "substr(str, start_pos[, length])", + alternative_syntax = "substring(str from start_pos for length)", + sql_example = r#"```sql +> select substr('datafusion', 5, 3); ++----------------------------------------------+ +| substr(Utf8("datafusion"),Int64(5),Int64(3)) | ++----------------------------------------------+ +| fus | ++----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "start_pos", + description = "Character position to start the substring at. The first character in the string has a position of 1." + ), + argument( + name = "length", + description = "Number of characters to extract. If not specified, returns the rest of the string after the start position." + ) +)] #[derive(Debug)] pub struct SubstrFunc { signature: Signature, @@ -154,34 +177,10 @@ impl ScalarUDFImpl for SubstrFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_substr_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_substr_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Extracts a substring of a specified number of characters from a specific starting position in a string.", - "substr(str, start_pos[, length])") - .with_sql_example(r#"```sql -> select substr('datafusion', 5, 3); -+----------------------------------------------+ -| substr(Utf8("datafusion"),Int64(5),Int64(3)) | -+----------------------------------------------+ -| fus | -+----------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("start_pos", "Character position to start the substring at. The first character in the string has a position of 1.") - .with_argument("length", "Number of characters to extract. If not specified, returns the rest of the string after the start position.") - .with_alternative_syntax("substring(str from start_pos for length)") - .build() - }) -} - /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).) /// substr('alphabet', 3) = 'phabet' /// substr('alphabet', 3, 2) = 'ph' diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index 61cd989bb964..60ccd2204788 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, @@ -26,12 +26,42 @@ use arrow::datatypes::{DataType, Int32Type, Int64Type}; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = r#"Returns the substring from str before count occurrences of the delimiter delim. +If count is positive, everything to the left of the final delimiter (counting from the left) is returned. +If count is negative, everything to the right of the final delimiter (counting from the right) is returned."#, + syntax_example = "substr_index(str, delim, count)", + sql_example = r#"```sql +> select substr_index('www.apache.org', '.', 1); ++---------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | ++---------------------------------------------------------+ +| www | ++---------------------------------------------------------+ +> select substr_index('www.apache.org', '.', -1); ++----------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | ++----------------------------------------------------------+ +| org | ++----------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "delim", + description = "The string to find in str to split str." + ), + argument( + name = "count", + description = "The number of times to search for the delimiter. Can be either a positive or negative number." + ) +)] #[derive(Debug)] pub struct SubstrIndexFunc { signature: Signature, @@ -91,41 +121,10 @@ impl ScalarUDFImpl for SubstrIndexFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_substr_index_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_substr_index_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - r#"Returns the substring from str before count occurrences of the delimiter delim. -If count is positive, everything to the left of the final delimiter (counting from the left) is returned. -If count is negative, everything to the right of the final delimiter (counting from the right) is returned."#, - "substr_index(str, delim, count)") - .with_sql_example(r#"```sql -> select substr_index('www.apache.org', '.', 1); -+---------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | -+---------------------------------------------------------+ -| www | -+---------------------------------------------------------+ -> select substr_index('www.apache.org', '.', -1); -+----------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | -+----------------------------------------------------------+ -| org | -+----------------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("delim", "The string to find in str to split str.") - .with_argument("count", "The number of times to search for the delimiter. Can be either a positive or negative number.") - .build() - }) -} - /// Returns the substring from str before count occurrences of the delimiter delim. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. /// SUBSTRING_INDEX('www.apache.org', '.', 1) = www /// SUBSTRING_INDEX('www.apache.org', '.', 2) = www.apache diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 9257b0b04e61..47766ded3add 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -16,7 +16,7 @@ // under the License. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::{ ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait, @@ -27,12 +27,31 @@ use unicode_segmentation::UnicodeSegmentation; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Translates characters in a string to specified translation characters.", + syntax_example = "translate(str, chars, translation)", + sql_example = r#"```sql +> select translate('twice', 'wic', 'her'); ++--------------------------------------------------+ +| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | ++--------------------------------------------------+ +| there | ++--------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "chars", description = "Characters to translate."), + argument( + name = "translation", + description = "Translation characters. Translation characters replace only characters at the same position in the **chars** string." + ) +)] #[derive(Debug)] pub struct TranslateFunc { signature: Signature, @@ -85,30 +104,10 @@ impl ScalarUDFImpl for TranslateFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_translate_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_translate_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder(DOC_SECTION_STRING,"Translates characters in a string to specified translation characters.","translate(str, chars, translation)") - .with_sql_example(r#"```sql -> select translate('twice', 'wic', 'her'); -+--------------------------------------------------+ -| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | -+--------------------------------------------------+ -| there | -+--------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("chars", "Characters to translate.") - .with_argument("translation", "Translation characters. Translation characters replace only characters at the same position in the **chars** string.") - .build() - }) -} - fn invoke_translate(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::Utf8View => { diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index f17e7189a948..c4501fff8f78 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1369,7 +1369,7 @@ right(str, n) #### Arguments - **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: Number of characters to return +- **n**: Number of characters to return. #### Example