diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index bc1bcd9f6861..fe86b7a15338 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -119,8 +119,6 @@ pub enum BuiltinScalarFunction { ArrayReplaceN, /// array_replace_all ArrayReplaceAll, - /// array_reverse - ArrayReverse, /// array_intersect ArrayIntersect, /// array_union @@ -289,7 +287,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable, - BuiltinScalarFunction::ArrayReverse => Volatility::Immutable, BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable, BuiltinScalarFunction::ArrayUnion => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, @@ -359,7 +356,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()), - BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayIntersect => { match (input_expr_types[0].clone(), input_expr_types[1].clone()) { (DataType::Null, DataType::Null) | (DataType::Null, _) => { @@ -557,7 +553,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { Signature::any(3, self.volatility()) } - BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), @@ -882,7 +877,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { &["array_replace_all", "list_replace_all"] } - BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"], BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"], BuiltinScalarFunction::ArrayIntersect => { &["array_intersect", "list_intersect"] diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 538a8a75ce5e..d37d0211491d 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -638,12 +638,6 @@ scalar_expr!( array from to, "replaces all occurrences of the specified element with another specified element." ); -scalar_expr!( - ArrayReverse, - array_reverse, - array, - "reverses the order of elements in the array." -); scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the elements in the union of array1 and array2 without duplicates."); scalar_expr!( diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index df35a9c29b10..53c8021fc15e 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -1202,3 +1202,72 @@ pub fn general_array_distinct( None, )?)) } + +/// array_reverse SQL function +pub fn array_reverse(arg: &[ArrayRef]) -> Result { + if arg.len() != 1 { + return exec_err!("array_reverse needs one argument"); + } + + match &arg[0].data_type() { + DataType::List(field) => { + let array = as_list_array(&arg[0])?; + general_array_reverse::(array, field) + } + DataType::LargeList(field) => { + let array = as_large_list_array(&arg[0])?; + general_array_reverse::(array, field) + } + DataType::Null => Ok(arg[0].clone()), + array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), + } +} + +fn general_array_reverse( + array: &GenericListArray, + field: &FieldRef, +) -> Result +where + O: TryFrom, +{ + let values = array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(original_data.len()); + let mut offsets = vec![O::usize_as(0)]; + let mut nulls = vec![]; + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], false, capacity); + + for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + // skip the null value + if array.is_null(row_index) { + nulls.push(false); + offsets.push(offsets[row_index] + O::one()); + mutable.extend(0, 0, 1); + continue; + } else { + nulls.push(true); + } + + let start = offset_window[0]; + let end = offset_window[1]; + + let mut index = end - O::one(); + let mut cnt = 0; + + while index >= start { + mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); + index = index - O::one(); + cnt += 1; + } + offsets.push(offsets[row_index] + O::usize_as(cnt)); + } + + let data = mutable.freeze(); + Ok(Arc::new(GenericListArray::::try_new( + field.clone(), + OffsetBuffer::::new(offsets.into()), + arrow_array::make_array(data), + Some(nulls.into()), + )?)) +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index e982290f66c0..1e0265e53c03 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -63,6 +63,7 @@ pub mod expr_fn { pub use super::udf::array_ndims; pub use super::udf::array_repeat; pub use super::udf::array_resize; + pub use super::udf::array_reverse; pub use super::udf::array_sort; pub use super::udf::array_to_string; pub use super::udf::cardinality; @@ -100,6 +101,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { udf::array_distinct_udf(), udf::array_repeat_udf(), udf::array_resize_udf(), + udf::array_reverse_udf(), ]; functions.into_iter().try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 0e0efa185fc1..64305cf1e587 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -882,3 +882,52 @@ impl ScalarUDFImpl for crate::udf::ArrayDistinct { &self.aliases } } + +make_udf_function!( + ArrayReverse, + array_reverse, + array, + "reverses the order of elements in the array.", + array_reverse_udf +); + +#[derive(Debug)] +pub(super) struct ArrayReverse { + signature: Signature, + aliases: Vec, +} + +impl crate::udf::ArrayReverse { + pub fn new() -> Self { + Self { + signature: Signature::any(1, Volatility::Immutable), + aliases: vec!["array_reverse".to_string(), "list_reverse".to_string()], + } + } +} + +impl ScalarUDFImpl for crate::udf::ArrayReverse { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_reserse" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + Ok(arg_types[0].clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_reverse(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 78bfe09e37d9..ca19d3052cc8 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -990,72 +990,3 @@ pub fn general_array_distinct( None, )?)) } - -/// array_reverse SQL function -pub fn array_reverse(arg: &[ArrayRef]) -> Result { - if arg.len() != 1 { - return exec_err!("array_reverse needs one argument"); - } - - match &arg[0].data_type() { - DataType::List(field) => { - let array = as_list_array(&arg[0])?; - general_array_reverse::(array, field) - } - DataType::LargeList(field) => { - let array = as_large_list_array(&arg[0])?; - general_array_reverse::(array, field) - } - DataType::Null => Ok(arg[0].clone()), - array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), - } -} - -fn general_array_reverse( - array: &GenericListArray, - field: &FieldRef, -) -> Result -where - O: TryFrom, -{ - let values = array.values(); - let original_data = values.to_data(); - let capacity = Capacities::Array(original_data.len()); - let mut offsets = vec![O::usize_as(0)]; - let mut nulls = vec![]; - let mut mutable = - MutableArrayData::with_capacities(vec![&original_data], false, capacity); - - for (row_index, offset_window) in array.offsets().windows(2).enumerate() { - // skip the null value - if array.is_null(row_index) { - nulls.push(false); - offsets.push(offsets[row_index] + O::one()); - mutable.extend(0, 0, 1); - continue; - } else { - nulls.push(true); - } - - let start = offset_window[0]; - let end = offset_window[1]; - - let mut index = end - O::one(); - let mut cnt = 0; - - while index >= start { - mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); - index = index - O::one(); - cnt += 1; - } - offsets.push(offsets[row_index] + O::usize_as(cnt)); - } - - let data = mutable.freeze(); - Ok(Arc::new(GenericListArray::::try_new( - field.clone(), - OffsetBuffer::::new(offsets.into()), - arrow_array::make_array(data), - Some(nulls.into()), - )?)) -} diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index e05cabc18b3a..f0eab1c857b7 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -282,9 +282,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_replace_all)(args) }), - BuiltinScalarFunction::ArrayReverse => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_reverse)(args) - }), BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_intersect)(args) }), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 04a8c5568322..3754e60c3cce 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -661,23 +661,23 @@ enum ScalarFunction { ArrayIntersect = 119; ArrayUnion = 120; OverLay = 121; - /// 122 is Range + // 122 is Range ArrayExcept = 123; // 124 was ArrayPopFront Levenshtein = 125; SubstrIndex = 126; FindInSet = 127; - /// 128 was ArraySort - /// 129 was ArrayDistinct - /// 130 was ArrayResize + // 128 was ArraySort + // 129 was ArrayDistinct + // 130 was ArrayResize EndsWith = 131; - /// 132 was InStr - /// 133 was MakeDate - ArrayReverse = 134; - /// 135 is RegexpLike - /// 136 was ToChar - /// 137 was ToDate - /// 138 was ToUnixtime + // 132 was InStr + // 133 was MakeDate + // 134 was ArrayReverse + // 135 is RegexpLike + // 136 was ToChar + // 137 was ToDate + // 138 was ToUnixtime } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 6b64c87122a6..ba619cb81d2f 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22970,7 +22970,6 @@ impl serde::Serialize for ScalarFunction { Self::SubstrIndex => "SubstrIndex", Self::FindInSet => "FindInSet", Self::EndsWith => "EndsWith", - Self::ArrayReverse => "ArrayReverse", }; serializer.serialize_str(variant) } @@ -23060,7 +23059,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "SubstrIndex", "FindInSet", "EndsWith", - "ArrayReverse", ]; struct GeneratedVisitor; @@ -23179,7 +23177,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "SubstrIndex" => Ok(ScalarFunction::SubstrIndex), "FindInSet" => Ok(ScalarFunction::FindInSet), "EndsWith" => Ok(ScalarFunction::EndsWith), - "ArrayReverse" => Ok(ScalarFunction::ArrayReverse), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 4606b5ea2815..c091ffa35f94 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2934,24 +2934,24 @@ pub enum ScalarFunction { ArrayIntersect = 119, ArrayUnion = 120, OverLay = 121, - /// / 122 is Range + /// 122 is Range ArrayExcept = 123, /// 124 was ArrayPopFront Levenshtein = 125, SubstrIndex = 126, FindInSet = 127, - /// / 128 was ArraySort - /// / 129 was ArrayDistinct - /// / 130 was ArrayResize - EndsWith = 131, - /// / 132 was InStr - /// / 133 was MakeDate + /// 128 was ArraySort + /// 129 was ArrayDistinct + /// 130 was ArrayResize /// - /// / 135 is RegexpLike - /// / 136 was ToChar - /// / 137 was ToDate - /// / 138 was ToUnixtime - ArrayReverse = 134, + /// 132 was InStr + /// 133 was MakeDate + /// 134 was ArrayReverse + /// 135 is RegexpLike + /// 136 was ToChar + /// 137 was ToDate + /// 138 was ToUnixtime + EndsWith = 131, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -3038,7 +3038,6 @@ impl ScalarFunction { ScalarFunction::SubstrIndex => "SubstrIndex", ScalarFunction::FindInSet => "FindInSet", ScalarFunction::EndsWith => "EndsWith", - ScalarFunction::ArrayReverse => "ArrayReverse", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -3122,7 +3121,6 @@ impl ScalarFunction { "SubstrIndex" => Some(Self::SubstrIndex), "FindInSet" => Some(Self::FindInSet), "EndsWith" => Some(Self::EndsWith), - "ArrayReverse" => Some(Self::ArrayReverse), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 41824f7a028d..8699daadd49d 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -45,6 +45,7 @@ use datafusion_common::{ Result, ScalarValue, }; use datafusion_expr::expr::Unnest; +use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ acosh, array_except, array_intersect, array_position, array_positions, array_remove, @@ -66,10 +67,6 @@ use datafusion_expr::{ JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, }; -use datafusion_expr::{ - array_reverse, - expr::{Alias, Placeholder}, -}; use super::LogicalExtensionCodec; @@ -480,7 +477,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayReplace => Self::ArrayReplace, ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, - ScalarFunction::ArrayReverse => Self::ArrayReverse, ScalarFunction::ArrayIntersect => Self::ArrayIntersect, ScalarFunction::ArrayUnion => Self::ArrayUnion, ScalarFunction::Log2 => Self::Log2, @@ -1422,9 +1418,6 @@ pub fn parse_expr( parse_expr(&args[1], registry, codec)?, parse_expr(&args[2], registry, codec)?, )), - ScalarFunction::ArrayReverse => { - Ok(array_reverse(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayUnion => Ok(array_union( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 196e02c93e67..bbf842712e34 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1462,7 +1462,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace, BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, - BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion, BuiltinScalarFunction::Log2 => Self::Log2, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 2ce044d36b5b..ac3e0209edb6 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -576,6 +576,7 @@ async fn roundtrip_expr_api() -> Result<()> { ), array_pop_front(make_array(vec![lit(1), lit(2), lit(3)])), array_pop_back(make_array(vec![lit(1), lit(2), lit(3)])), + array_reverse(make_array(vec![lit(1), lit(2), lit(3)])), ]; // ensure expressions created with the expr api can be round tripped