From fbb36b60887211157cddade5b72c640ead2f6da0 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 31 Jul 2024 20:36:52 -0400 Subject: [PATCH] improve speed of view take kernel --- arrow-select/src/take.rs | 17 ++++++++++++----- arrow/benches/take_kernels.rs | 34 ++++++++++++++++++++++++++++++++++ arrow/src/util/bench_util.rs | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index d6892eb0a9e4..e3132535ce54 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -485,13 +485,20 @@ fn take_byte_view( array: &GenericByteViewArray, indices: &PrimitiveArray, ) -> Result, ArrowError> { + let data_len = indices.len(); + let new_views = take_native(array.views(), indices); let new_nulls = take_nulls(array.nulls(), indices); - Ok(GenericByteViewArray::new( - new_views, - array.data_buffers().to_vec(), - new_nulls, - )) + + let array_data = ArrayData::builder(T::DATA_TYPE) + .len(data_len) + .add_buffer(new_views.into_inner()) + .add_buffers(array.data_buffers().to_vec()) + .nulls(new_nulls); + + let array_data = unsafe { array_data.build_unchecked() }; + + Ok(GenericByteViewArray::from(array_data)) } /// `take` implementation for list arrays diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index 9c3f1eb40909..6e6f7eae811b 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -149,6 +149,40 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_take(&values, &indices)) }); + let values = create_string_view_array(512, 0.0); + let indices = create_random_index(512, 0.0); + c.bench_function("take stringview 512", |b| b.iter(|| bench_take(&values, &indices))); + + let values = create_string_view_array(1024, 0.0); + let indices = create_random_index(1024, 0.0); + c.bench_function("take stringview 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_string_view_array(512, 0.0); + let indices = create_random_index(512, 0.5); + c.bench_function("take stringview null indices 512", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_string_view_array(1024, 0.0); + let indices = create_random_index(1024, 0.5); + c.bench_function("take stringview null indices 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_string_view_array(1024, 0.5); + let indices = create_random_index(1024, 0.0); + c.bench_function("take stringview null values 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_string_view_array(1024, 0.5); + let indices = create_random_index(1024, 0.5); + c.bench_function("take stringview null values null indices 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + let values = create_primitive_run_array::(1024, 512); let indices = create_random_index(1024, 0.0); c.bench_function( diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index ac7f86d561d5..fceb5a5fbcf0 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -160,6 +160,38 @@ pub fn create_string_array_with_len( .collect() } + +/// Creates a random (but fixed-seeded) string view array of a given size and null density. +/// +/// See `create_string_array` above for more details. +pub fn create_string_view_array( + size: usize, + null_density: f32, +) -> StringViewArray { + create_string_view_array_with_max_len(size, null_density, 400) +} + +/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length +fn create_string_view_array_with_max_len( + size: usize, + null_density: f32, + max_str_len: usize, +) -> StringViewArray { + let rng = &mut seedable_rng(); + (0..size) + .map(|_| { + if rng.gen::() < null_density { + None + } else { + let str_len = rng.gen_range(0..max_str_len); + let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); + let value = String::from_utf8(value).unwrap(); + Some(value) + } + }) + .collect() +} + /// Creates a random (but fixed-seeded) array of a given size, null density and length pub fn create_string_view_array_with_len( size: usize,