Skip to content

Commit

Permalink
Add ArrowBytesViewMap and ArrowBytesViewSet (#11515)
Browse files Browse the repository at this point in the history
* Update `string-view` branch to arrow-rs main (#10966)

* Pin to arrow main

* Fix clippy with latest arrow

* Uncomment test that needs new arrow-rs to work

* Update datafusion-cli Cargo.lock

* Update Cargo.lock

* tapelo

* merge

* update cast

* consistent dep

* fix ci

* add more tests

* make doc happy

* update new implementation

* fix bug

* avoid unused dep

* update dep

* update

* fix cargo check

* update doc

* pick up the comments change again

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
XiangpengHao and alamb authored Jul 19, 2024
1 parent 8e0ca1a commit db65772
Show file tree
Hide file tree
Showing 3 changed files with 691 additions and 7 deletions.
12 changes: 5 additions & 7 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ use arrow_buffer::IntervalDayTime;
use arrow_buffer::IntervalMonthDayNano;

use crate::cast::{
as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
as_large_list_array, as_list_array, as_primitive_array, as_string_array,
as_struct_array,
as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array,
as_string_array, as_string_view_array, as_struct_array,
};
use crate::error::{Result, _internal_err};

Expand Down Expand Up @@ -360,8 +360,6 @@ pub fn create_hashes<'a>(
random_state: &RandomState,
hashes_buffer: &'a mut Vec<u64>,
) -> Result<&'a mut Vec<u64>> {
use crate::cast::{as_binary_view_array, as_string_view_array};

for (i, col) in arrays.iter().enumerate() {
let array = col.as_ref();
// combine hashes with `combine_hashes` for all columns besides the first
Expand All @@ -371,11 +369,11 @@ pub fn create_hashes<'a>(
DataType::Null => hash_null(random_state, hashes_buffer, rehash),
DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
DataType::FixedSizeBinary(_) => {
let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
hash_array(array, random_state, hashes_buffer, rehash)
Expand Down
Loading

0 comments on commit db65772

Please sign in to comment.