Skip to content

Commit

Permalink
improve array expression doc and clean up array_expression.rs (#9650)
Browse files Browse the repository at this point in the history
  • Loading branch information
Weijun-H authored Mar 18, 2024
1 parent c072abb commit e37ac35
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 70 deletions.
70 changes: 0 additions & 70 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use arrow::buffer::OffsetBuffer;
use arrow::datatypes::{DataType, Field};
use arrow_buffer::NullBuffer;

use arrow_schema::FieldRef;
use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
use datafusion_common::utils::array_into_list_array;
use datafusion_common::{exec_err, plan_err, Result};
Expand Down Expand Up @@ -561,72 +560,3 @@ pub fn array_replace_all(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}
}

/// array_reverse SQL function
pub fn array_reverse(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() != 1 {
return exec_err!("array_reverse needs one argument");
}

match &arg[0].data_type() {
DataType::List(field) => {
let array = as_list_array(&arg[0])?;
general_array_reverse::<i32>(array, field)
}
DataType::LargeList(field) => {
let array = as_large_list_array(&arg[0])?;
general_array_reverse::<i64>(array, field)
}
DataType::Null => Ok(arg[0].clone()),
array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
}
}

fn general_array_reverse<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
field: &FieldRef,
) -> Result<ArrayRef>
where
O: TryFrom<i64>,
{
let values = array.values();
let original_data = values.to_data();
let capacity = Capacities::Array(original_data.len());
let mut offsets = vec![O::usize_as(0)];
let mut nulls = vec![];
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], false, capacity);

for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
// skip the null value
if array.is_null(row_index) {
nulls.push(false);
offsets.push(offsets[row_index] + O::one());
mutable.extend(0, 0, 1);
continue;
} else {
nulls.push(true);
}

let start = offset_window[0];
let end = offset_window[1];

let mut index = end - O::one();
let mut cnt = 0;

while index >= start {
mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
index = index - O::one();
cnt += 1;
}
offsets.push(offsets[row_index] + O::usize_as(cnt));
}

let data = mutable.freeze();
Ok(Arc::new(GenericListArray::<O>::try_new(
field.clone(),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
Some(nulls.into()),
)?))
}
49 changes: 49 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,7 @@ from_unixtime(expression)
- [array_extract](#array_extract)
- [array_fill](#array_fill)
- [array_indexof](#array_indexof)
- [array_intersect](#array_intersect)
- [array_join](#array_join)
- [array_length](#array_length)
- [array_ndims](#array_ndims)
Expand Down Expand Up @@ -1997,6 +1998,7 @@ from_unixtime(expression)
- [list_has_all](#list_has_all)
- [list_has_any](#list_has_any)
- [list_indexof](#list_indexof)
- [list_intersect](#list_intersect)
- [list_join](#list_join)
- [list_length](#list_length)
- [list_ndims](#list_ndims)
Expand All @@ -2017,6 +2019,7 @@ from_unixtime(expression)
- [list_replace_all](#list_replace_all)
- [list_slice](#list_slice)
- [list_to_string](#list_to_string)
- [list_union](#list_union)
- [make_array](#make_array)
- [make_list](#make_list)
- [string_to_array](#string_to_array)
Expand Down Expand Up @@ -2340,6 +2343,44 @@ flatten(array)

_Alias of [array_position](#array_position)._

### `array_intersect`

Returns an array of elements in the intersection of array1 and array2.

```
array_intersect(array1, array2)
```

#### Arguments

- **array1**: Array expression.
Can be a constant, column, or function, and any combination of array operators.
- **array2**: Array expression.
Can be a constant, column, or function, and any combination of array operators.

#### Example

```
❯ select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]);
+----------------------------------------------------+
| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); |
+----------------------------------------------------+
| [3, 4] |
+----------------------------------------------------+
❯ select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]);
+----------------------------------------------------+
| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); |
+----------------------------------------------------+
| [] |
+----------------------------------------------------+
```

---

#### Aliases

- list_intersect

### `array_join`

_Alias of [array_to_string](#array_to_string)._
Expand Down Expand Up @@ -3068,6 +3109,10 @@ _Alias of [array_has_any](#array_has_any)._

_Alias of [array_position](#array_position)._

### `list_intersect`

_Alias of [array_position](#array_intersect)._

### `list_join`

_Alias of [array_to_string](#array_to_string)._
Expand Down Expand Up @@ -3152,6 +3197,10 @@ _Alias of [array_slice](#array_slice)._

_Alias of [array_to_string](#array_to_string)._

### `list_union`

_Alias of [array_to_string](#array_union)._

### `make_array`

Returns an Arrow array using the specified input expressions.
Expand Down

0 comments on commit e37ac35

Please sign in to comment.