From e37ac35c435d37935c92dffafb0bf7b45d037bf0 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Mon, 18 Mar 2024 10:45:36 +0800 Subject: [PATCH] improve array expression doc and clean up array_expression.rs (#9650) --- .../physical-expr/src/array_expressions.rs | 70 ------------------- .../source/user-guide/sql/scalar_functions.md | 49 +++++++++++++ 2 files changed, 49 insertions(+), 70 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 8cb5d868067c..f53ef954d0dd 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -24,7 +24,6 @@ use arrow::buffer::OffsetBuffer; use arrow::datatypes::{DataType, Field}; use arrow_buffer::NullBuffer; -use arrow_schema::FieldRef; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; use datafusion_common::utils::array_into_list_array; use datafusion_common::{exec_err, plan_err, Result}; @@ -561,72 +560,3 @@ pub fn array_replace_all(args: &[ArrayRef]) -> Result { } } } - -/// array_reverse SQL function -pub fn array_reverse(arg: &[ArrayRef]) -> Result { - if arg.len() != 1 { - return exec_err!("array_reverse needs one argument"); - } - - match &arg[0].data_type() { - DataType::List(field) => { - let array = as_list_array(&arg[0])?; - general_array_reverse::(array, field) - } - DataType::LargeList(field) => { - let array = as_large_list_array(&arg[0])?; - general_array_reverse::(array, field) - } - DataType::Null => Ok(arg[0].clone()), - array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), - } -} - -fn general_array_reverse( - array: &GenericListArray, - field: &FieldRef, -) -> Result -where - O: TryFrom, -{ - let values = array.values(); - let original_data = values.to_data(); - let capacity = Capacities::Array(original_data.len()); - let mut offsets = vec![O::usize_as(0)]; - let mut nulls = vec![]; - let mut mutable = - MutableArrayData::with_capacities(vec![&original_data], false, capacity); - - for (row_index, offset_window) in array.offsets().windows(2).enumerate() { - // skip the null value - if array.is_null(row_index) { - nulls.push(false); - offsets.push(offsets[row_index] + O::one()); - mutable.extend(0, 0, 1); - continue; - } else { - nulls.push(true); - } - - let start = offset_window[0]; - let end = offset_window[1]; - - let mut index = end - O::one(); - let mut cnt = 0; - - while index >= start { - mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); - index = index - O::one(); - cnt += 1; - } - offsets.push(offsets[row_index] + O::usize_as(cnt)); - } - - let data = mutable.freeze(); - Ok(Arc::new(GenericListArray::::try_new( - field.clone(), - OffsetBuffer::::new(offsets.into()), - arrow_array::make_array(data), - Some(nulls.into()), - )?)) -} diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 9ab460e62067..b63fa9950ae0 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1958,6 +1958,7 @@ from_unixtime(expression) - [array_extract](#array_extract) - [array_fill](#array_fill) - [array_indexof](#array_indexof) +- [array_intersect](#array_intersect) - [array_join](#array_join) - [array_length](#array_length) - [array_ndims](#array_ndims) @@ -1997,6 +1998,7 @@ from_unixtime(expression) - [list_has_all](#list_has_all) - [list_has_any](#list_has_any) - [list_indexof](#list_indexof) +- [list_intersect](#list_intersect) - [list_join](#list_join) - [list_length](#list_length) - [list_ndims](#list_ndims) @@ -2017,6 +2019,7 @@ from_unixtime(expression) - [list_replace_all](#list_replace_all) - [list_slice](#list_slice) - [list_to_string](#list_to_string) +- [list_union](#list_union) - [make_array](#make_array) - [make_list](#make_list) - [string_to_array](#string_to_array) @@ -2340,6 +2343,44 @@ flatten(array) _Alias of [array_position](#array_position)._ +### `array_intersect` + +Returns an array of elements in the intersection of array1 and array2. + +``` +array_intersect(array1, array2) +``` + +#### Arguments + +- **array1**: Array expression. + Can be a constant, column, or function, and any combination of array operators. +- **array2**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); ++----------------------------------------------------+ +| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); | ++----------------------------------------------------+ +| [3, 4] | ++----------------------------------------------------+ +❯ select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); ++----------------------------------------------------+ +| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); | ++----------------------------------------------------+ +| [] | ++----------------------------------------------------+ +``` + +--- + +#### Aliases + +- list_intersect + ### `array_join` _Alias of [array_to_string](#array_to_string)._ @@ -3068,6 +3109,10 @@ _Alias of [array_has_any](#array_has_any)._ _Alias of [array_position](#array_position)._ +### `list_intersect` + +_Alias of [array_position](#array_intersect)._ + ### `list_join` _Alias of [array_to_string](#array_to_string)._ @@ -3152,6 +3197,10 @@ _Alias of [array_slice](#array_slice)._ _Alias of [array_to_string](#array_to_string)._ +### `list_union` + +_Alias of [array_to_string](#array_union)._ + ### `make_array` Returns an Arrow array using the specified input expressions.