Skip to content

Commit

Permalink
fix(datafusion-functions-nested): arrow-distinct now work with null…
Browse files Browse the repository at this point in the history
… rows
  • Loading branch information
rluvaton committed Jan 1, 2025
1 parent 0ac0dee commit 2fc52cc
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions datafusion/functions-nested/src/set_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,18 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
let mut new_arrays = Vec::with_capacity(array.len());
let converter = RowConverter::new(vec![SortField::new(dt)])?;
// distinct for each list in ListArray
for arr in array.iter().flatten() {
for arr in array.iter() {
let last_offset: OffsetSize = offsets.last().copied().unwrap();
if arr.is_none() {
// Add same offset for null
offsets.push(last_offset);
continue;
}

let arr = arr.unwrap();
let values = converter.convert_columns(&[arr])?;
// sort elements in list and remove duplicates
let rows = values.iter().sorted().dedup().collect::<Vec<_>>();
let last_offset: OffsetSize = offsets.last().copied().unwrap();
offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
let arrays = converter.convert_rows(rows)?;
let array = match arrays.first() {
Expand All @@ -538,6 +545,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
Arc::clone(field),
offsets,
values,
None,
// Keep the list nulls
array.nulls().cloned(),
)?))
}

0 comments on commit 2fc52cc

Please sign in to comment.