Skip to content

Commit

Permalink
fix(datafusion-functions-nested): arrow-distinct now work with null…
Browse files Browse the repository at this point in the history
… rows (#13966)

* added failing test

* fix(datafusion-functions-nested): `arrow-distinct` now work with null rows

* Update datafusion/functions-nested/src/set_ops.rs

Co-authored-by: Andrew Lamb <[email protected]>

* Update set_ops.rs

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
rluvaton and alamb authored Jan 2, 2025
1 parent 38ccb00 commit 04f56bd
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
12 changes: 9 additions & 3 deletions datafusion/functions-nested/src/set_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,16 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
let mut new_arrays = Vec::with_capacity(array.len());
let converter = RowConverter::new(vec![SortField::new(dt)])?;
// distinct for each list in ListArray
for arr in array.iter().flatten() {
for arr in array.iter() {
let last_offset: OffsetSize = offsets.last().copied().unwrap();
let Some(arr) = arr else {
// Add same offset for null
offsets.push(last_offset);
continue;
};
let values = converter.convert_columns(&[arr])?;
// sort elements in list and remove duplicates
let rows = values.iter().sorted().dedup().collect::<Vec<_>>();
let last_offset: OffsetSize = offsets.last().copied().unwrap();
offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
let arrays = converter.convert_rows(rows)?;
let array = match arrays.first() {
Expand All @@ -538,6 +543,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
Arc::clone(field),
offsets,
values,
None,
// Keep the list nulls
array.nulls().cloned(),
)?))
}
7 changes: 7 additions & 0 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5674,6 +5674,13 @@ select array_distinct([sum(a)]) from t1 where a > 100 group by b;
statement ok
drop table t1;

query ?
select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a);
----
[1, 2, 3]
NULL
[1, 3]

query ?
select array_distinct([]);
----
Expand Down

0 comments on commit 04f56bd

Please sign in to comment.