From 383f279982777e5a4306f0072403bb822158b3fc Mon Sep 17 00:00:00 2001 From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com> Date: Mon, 30 Dec 2024 03:25:57 +0800 Subject: [PATCH] doc-gen: migrate scalar functions (array) documentation 2/3 (#13929) * doc-gen: migrate scalar functions (array) documentation 2/3 * fix: import doc and macro, fix typo and update function docs --------- Co-authored-by: Cheng-Yuan-Lai --- datafusion/functions-nested/src/flatten.rs | 49 +++---- datafusion/functions-nested/src/length.rs | 55 +++----- datafusion/functions-nested/src/make_array.rs | 53 +++---- datafusion/functions-nested/src/map.rs | 105 +++++++------- .../functions-nested/src/map_extract.rs | 67 ++++----- datafusion/functions-nested/src/map_keys.rs | 51 +++---- datafusion/functions-nested/src/map_values.rs | 52 +++---- datafusion/functions-nested/src/position.rs | 129 ++++++++---------- .../source/user-guide/sql/scalar_functions.md | 24 ++-- 9 files changed, 243 insertions(+), 342 deletions(-) diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 7cb52ae4c5c9..30bf2fcbf624 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -26,13 +26,13 @@ use datafusion_common::cast::{ as_generic_list_array, as_large_list_array, as_list_array, }; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( Flatten, @@ -42,6 +42,23 @@ make_udf_expr_and_func!( flatten_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.", + syntax_example = "flatten(array)", + sql_example = r#"```sql +> select flatten([[1, 2], [3, 4]]); ++------------------------------+ +| flatten(List([1,2], [3,4])) | ++------------------------------+ +| [1, 2, 3, 4] | ++------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct Flatten { signature: Signature, @@ -118,35 +135,9 @@ impl ScalarUDFImpl for Flatten { } fn documentation(&self) -> Option<&Documentation> { - Some(get_flatten_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_flatten_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.", - - "flatten(array)") - .with_sql_example( - r#"```sql -> select flatten([[1, 2], [3, 4]]); -+------------------------------+ -| flatten(List([1,2], [3,4])) | -+------------------------------+ -| [1, 2, 3, 4] | -+------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} /// Flatten SQL function pub fn flatten_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index 2f03842cbeeb..70a9188a2c3d 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -25,13 +25,13 @@ use arrow_schema::DataType; use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64}; use datafusion_common::cast::{as_generic_list_array, as_int64_array}; use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; use datafusion_functions::{downcast_arg, downcast_named_arg}; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( ArrayLength, @@ -41,6 +41,24 @@ make_udf_expr_and_func!( array_length_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the length of the array dimension.", + syntax_example = "array_length(array, dimension)", + sql_example = r#"```sql +> select array_length([1, 2, 3, 4, 5], 1); ++-------------------------------------------+ +| array_length(List([1,2,3,4,5]), 1) | ++-------------------------------------------+ +| 5 | ++-------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "dimension", description = "Array dimension.") +)] #[derive(Debug)] pub struct ArrayLength { signature: Signature, @@ -96,41 +114,10 @@ impl ScalarUDFImpl for ArrayLength { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the length of the array dimension.", - - "array_length(array, dimension)") - .with_sql_example( - r#"```sql -> select array_length([1, 2, 3, 4, 5], 1); -+-------------------------------------------+ -| array_length(List([1,2,3,4,5]), 1) | -+-------------------------------------------+ -| 5 | -+-------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "dimension", - "Array dimension.", - ) - .build() - }) -} - /// Array_length SQL function pub fn array_length_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index efedd897de87..0283cdd40275 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -18,9 +18,10 @@ //! [`ScalarUDFImpl`] definitions for `make_array` function. use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use std::vec; +use crate::utils::make_scalar_function; use arrow::array::{ArrayData, Capacities, MutableArrayData}; use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait, @@ -33,13 +34,11 @@ use datafusion_common::{plan_err, Result}; use datafusion_expr::binary::{ try_type_union_resolution_with_struct, type_union_resolution, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::TypeSignature; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - -use crate::utils::make_scalar_function; +use datafusion_macros::user_doc; make_udf_expr_and_func!( MakeArray, @@ -48,6 +47,23 @@ make_udf_expr_and_func!( make_array_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array using the specified input expressions.", + syntax_example = "make_array(expression1[, ..., expression_n])", + sql_example = r#"```sql +> select make_array(1, 2, 3, 4, 5); ++----------------------------------------------------------+ +| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | ++----------------------------------------------------------+ +| [1, 2, 3, 4, 5] | ++----------------------------------------------------------+ +```"#, + argument( + name = "expression_n", + description = "Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators." + ) +)] #[derive(Debug)] pub struct MakeArray { signature: Signature, @@ -139,37 +155,10 @@ impl ScalarUDFImpl for MakeArray { } fn documentation(&self) -> Option<&Documentation> { - Some(get_make_array_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_make_array_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array using the specified input expressions.", - - "make_array(expression1[, ..., expression_n])") - .with_sql_example( - r#"```sql -> select make_array(1, 2, 3, 4, 5); -+----------------------------------------------------------+ -| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | -+----------------------------------------------------------+ -| [1, 2, 3, 4, 5] | -+----------------------------------------------------------+ -```"#, - ) - .with_argument( - "expression_n", - "Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators.", - ) - .build() - }) -} - // Empty array is a special case that is useful for many other array functions pub(super) fn empty_array_type() -> DataType { List(Arc::new(Field::new_list_field(DataType::Int64, true))) diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index d21a19c9fb33..0b098a30b758 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::collections::VecDeque; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow::array::ArrayData; use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray}; @@ -27,10 +27,10 @@ use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{exec_err, HashSet, Result, ScalarValue}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::make_array::make_array; @@ -181,6 +181,50 @@ fn make_map_batch_internal( }) } +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns an Arrow map with the specified key-value pairs.\n\n\ + The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.", + syntax_example = "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])", + sql_example = r#" +```sql +-- Using map function +SELECT MAP('type', 'test'); +---- +{type: test} + +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); +---- +{POST: 41, HEAD: 33, PATCH: } + +SELECT MAP([[1,2], [3,4]], ['a', 'b']); +---- +{[1, 2]: a, [3, 4]: b} + +SELECT MAP { 'a': 1, 'b': 2 }; +---- +{a: 1, b: 2} + +-- Using make_map function +SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); +---- +{POST: 41, HEAD: 33} + +SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); +---- +{key1: value1, key2: } +```"#, + argument( + name = "key", + description = "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null." + ), + argument( + name = "value", + description = "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of values to be mapped to the corresponding keys." + ) +)] #[derive(Debug)] pub struct MapFunc { signature: Signature, @@ -247,65 +291,10 @@ impl ScalarUDFImpl for MapFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns an Arrow map with the specified key-value pairs.\n\n\ - The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.", - - "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])" - ) - .with_sql_example( - r#" -```sql --- Using map function -SELECT MAP('type', 'test'); ----- -{type: test} - -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} - --- Using make_map function -SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); ----- -{POST: 41, HEAD: 33} - -SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); ----- -{key1: value1, key2: } -```"#, - ) - .with_argument( - "key", - "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ - For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null." - ) - .with_argument( - "value", - "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ - For `make_map`: The list of values to be mapped to the corresponding keys." - ) - .build() - }) -} - fn get_element_type(data_type: &DataType) -> Result<&DataType> { match data_type { DataType::List(element) => Ok(element.data_type()), diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 24f396e741b2..1ade3f67c973 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -26,12 +26,12 @@ use arrow_buffer::OffsetBuffer; use arrow_schema::Field; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use std::vec; use crate::utils::{get_map_entry_field, make_scalar_function}; @@ -45,6 +45,32 @@ make_udf_expr_and_func!( map_extract_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list containing the value for the given key or an empty list if the key is not present in the map.", + syntax_example = "map_extract(map, key)", + sql_example = r#"```sql +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +[1] + +SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); +---- +['two'] + +SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); +---- +[] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ), + argument( + name = "key", + description = "Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed." + ) +)] #[derive(Debug)] pub(super) struct MapExtract { signature: Signature, @@ -109,45 +135,10 @@ impl ScalarUDFImpl for MapExtract { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_extract_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_extract_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list containing the value for the given key or an empty list if the key is not present in the map.", - "map_extract(map, key)") - .with_sql_example( - r#"```sql -SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); ----- -[1] - -SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); ----- -['two'] - -SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); ----- -[] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators.", - ) - .with_argument( - "key", - "Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", - ) - .build() - }) -} - fn general_map_extract_inner( map_array: &MapArray, query_keys_array: &dyn Array, diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 1d19cb8492f0..d3afce3e402e 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -21,13 +21,13 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( MapKeysFunc, @@ -37,6 +37,24 @@ make_udf_expr_and_func!( map_keys_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list of all keys in the map.", + syntax_example = "map_keys(map)", + sql_example = r#"```sql +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +SELECT map_keys(map([100, 5], [42, 43])); +---- +[100, 5] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) +)] #[derive(Debug)] pub(crate) struct MapKeysFunc { signature: Signature, @@ -87,37 +105,10 @@ impl ScalarUDFImpl for MapKeysFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_keys_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_keys_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list of all keys in the map.", - "map_keys(map)") - .with_sql_example( - r#"```sql -SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[a, b, c] - -SELECT map_keys(map([100, 5], [42, 43])); ----- -[100, 5] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators." - ) - .build() - }) -} - fn map_keys_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("map_keys expects single argument"); diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 816ebe74aff0..fb3aec009f50 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -21,13 +21,13 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; make_udf_expr_and_func!( MapValuesFunc, @@ -37,6 +37,24 @@ make_udf_expr_and_func!( map_values_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list of all values in the map.", + syntax_example = "map_values(map)", + sql_example = r#"```sql +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +SELECT map_values(map([100, 5], [42, 43])); +---- +[42, 43] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) +)] #[derive(Debug)] pub(crate) struct MapValuesFunc { signature: Signature, @@ -87,38 +105,10 @@ impl ScalarUDFImpl for MapValuesFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_values_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_values_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list of all values in the map.", - - "map_values(map)") - .with_sql_example( - r#"```sql -SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[1, , 3] - -SELECT map_values(map([100, 5], [42, 43])); ----- -[42, 43] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators." - ) - .build() - }) -} - fn map_values_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("map_values expects single argument"); diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index feacc7006192..f56fdf734c9c 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -19,12 +19,13 @@ use arrow_schema::DataType::{LargeList, List, UInt64}; use arrow_schema::{DataType, Field}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; + use std::any::Any; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; use arrow_array::types::UInt64Type; use arrow_array::{ @@ -46,6 +47,34 @@ make_udf_expr_and_func!( array_position_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the position of the first occurrence of the specified element in the array.", + syntax_example = "array_position(array, element)\narray_position(array, element, index)", + sql_example = r#"```sql +> select array_position([1, 2, 2, 3, 1, 4], 2); ++----------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2)) | ++----------------------------------------------+ +| 2 | ++----------------------------------------------+ +> select array_position([1, 2, 2, 3, 1, 4], 2, 3); ++----------------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | ++----------------------------------------------------+ +| 3 | ++----------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to search for position in the array." + ), + argument(name = "index", description = "Index at which to start searching.") +)] #[derive(Debug)] pub(super) struct ArrayPosition { signature: Signature, @@ -95,51 +124,10 @@ impl ScalarUDFImpl for ArrayPosition { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_position_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_position_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the position of the first occurrence of the specified element in the array.", - - "array_position(array, element)\narray_position(array, element, index)") - .with_sql_example( - r#"```sql -> select array_position([1, 2, 2, 3, 1, 4], 2); -+----------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2)) | -+----------------------------------------------+ -| 2 | -+----------------------------------------------+ -> select array_position([1, 2, 2, 3, 1, 4], 2, 3); -+----------------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | -+----------------------------------------------------+ -| 3 | -+----------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to search for position in the array.", - ) - .with_argument( - "index", - "Index at which to start searching.", - ) - .build() - }) -} - /// Array_position SQL function pub fn array_position_inner(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { @@ -224,6 +212,28 @@ make_udf_expr_and_func!( "searches for an element in the array, returns all occurrences.", // doc array_positions_udf // internal function name ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Searches for an element in the array, returns all occurrences.", + syntax_example = "array_positions(array, element)", + sql_example = r#"```sql +> select array_positions([1, 2, 2, 3, 1, 4], 2); ++-----------------------------------------------+ +| array_positions(List([1,2,2,3,1,4]),Int64(2)) | ++-----------------------------------------------+ +| [2, 3] | ++-----------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to search for position in the array." + ) +)] #[derive(Debug)] pub(super) struct ArrayPositions { signature: Signature, @@ -268,39 +278,10 @@ impl ScalarUDFImpl for ArrayPositions { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_positions_doc()) + self.doc() } } -fn get_array_positions_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Searches for an element in the array, returns all occurrences.", - - "array_positions(array, element)") - .with_sql_example( - r#"```sql -> select array_positions([1, 2, 2, 3, 1, 4], 2); -+-----------------------------------------------+ -| array_positions(List([1,2,2,3,1,4]),Int64(2)) | -+-----------------------------------------------+ -| [2, 3] | -+-----------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to search for positions in the array.", - ) - .build() - }) -} - /// Array_positions SQL function pub fn array_positions_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 4cf5ff4b7142..f17e7189a948 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3122,34 +3122,26 @@ array_position(array, element, index) ### `array_positions` -Returns the position of the first occurrence of the specified element in the array. +Searches for an element in the array, returns all occurrences. ``` -array_position(array, element) -array_position(array, element, index) +array_positions(array, element) ``` #### Arguments - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - **element**: Element to search for position in the array. -- **index**: Index at which to start searching. #### Example ```sql -> select array_position([1, 2, 2, 3, 1, 4], 2); -+----------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2)) | -+----------------------------------------------+ -| 2 | -+----------------------------------------------+ -> select array_position([1, 2, 2, 3, 1, 4], 2, 3); -+----------------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | -+----------------------------------------------------+ -| 3 | -+----------------------------------------------------+ +> select array_positions([1, 2, 2, 3, 1, 4], 2); ++-----------------------------------------------+ +| array_positions(List([1,2,2,3,1,4]),Int64(2)) | ++-----------------------------------------------+ +| [2, 3] | ++-----------------------------------------------+ ``` #### Aliases