From 9178c78532d6ccbe75b4540f87e9456595936d62 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 20:04:19 -0500 Subject: [PATCH] some string funcs --- .../rayexec_execution/src/arrays/array/exp.rs | 4 + .../src/arrays/buffer/mod.rs | 43 +++++++++ .../src/arrays/buffer/physical_type.rs | 28 ++++++ .../src/arrays/buffer/string_view.rs | 43 +++++++++ .../src/arrays/executor/scalar/uniform.rs | 10 +- .../src/functions/scalar/builtin/boolean.rs | 6 +- .../functions/scalar/builtin/string/concat.rs | 96 +++++++++++-------- .../scalar/builtin/string/contains.rs | 54 ++++++----- .../scalar/builtin/string/ends_with.rs | 58 ++++++----- .../functions/scalar/builtin/string/length.rs | 76 ++++++++------- .../functions/scalar/builtin/string/like.rs | 58 ++++++----- 11 files changed, 320 insertions(+), 156 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 51b50ea66..0838fd003 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -122,6 +122,10 @@ where &self.data } + pub fn data_mut(&mut self) -> &mut ArrayData { + &mut self.data + } + pub fn validity(&self) -> &Validity { &self.validity } diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index f3e73bea7..e30de93c1 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -5,10 +5,13 @@ pub mod string_view; mod raw; use buffer_manager::{BufferManager, NopBufferManager}; +use fmtutil::IntoDisplayableSlice; use physical_type::{PhysicalStorage, PhysicalType}; use raw::RawBufferParts; use rayexec_error::{RayexecError, Result}; use string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, StringViewAddressable, StringViewAddressableMut, StringViewHeap, @@ -110,6 +113,34 @@ where Ok(StringViewAddressableMut { metadata, heap }) } + pub fn try_as_binary_view_addressable(&self) -> Result { + self.check_type_one_of(&[PhysicalType::Utf8, PhysicalType::Binary])?; + + let metadata = unsafe { self.primary.as_slice::() }; + let heap = match self.secondary.as_ref() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressable { metadata, heap }) + } + + pub fn try_as_binary_view_addressable_mut(&mut self) -> Result { + // Note that unlike the non-mut version of this function, we only allow + // physical binary types here. For reads, treating strings as binary is + // completely fine, but allowing writing raw binary to a logical string + // array could lead to invalid utf8. + self.check_type(PhysicalType::Binary)?; + + let metadata = unsafe { self.primary.as_slice_mut::() }; + let heap = match self.secondary.as_mut() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressableMut { metadata, heap }) + } + fn check_type(&self, want: PhysicalType) -> Result<()> { if want != self.physical_type { return Err(RayexecError::new("Physical types don't match") @@ -119,6 +150,18 @@ where Ok(()) } + + fn check_type_one_of(&self, oneof: &[PhysicalType]) -> Result<()> { + if !oneof.contains(&self.physical_type) { + return Err( + RayexecError::new("Physical type not one of requested types") + .with_field("have", self.physical_type) + .with_field("oneof", oneof.display_as_list().to_string()), + ); + } + + Ok(()) + } } impl Drop for ArrayBuffer { diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 57aecab24..00a96090b 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -5,6 +5,8 @@ use rayexec_error::Result; use super::buffer_manager::BufferManager; use super::string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, StringViewAddressable, StringViewAddressableMut, StringViewMetadataUnion, @@ -273,6 +275,32 @@ impl MutablePhysicalStorage for PhysicalUtf8 { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalBinary; + +impl PhysicalStorage for PhysicalBinary { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Binary; + + type PrimaryBufferType = StringViewMetadataUnion; + type StorageType = [u8]; + + type Addressable<'a> = BinaryViewAddressable<'a>; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_binary_view_addressable() + } +} + +impl MutablePhysicalStorage for PhysicalBinary { + type AddressableMut<'a> = BinaryViewAddressableMut<'a>; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_binary_view_addressable_mut() + } +} + /// Dictionary arrays have the selection vector as the primary data buffer. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct PhysicalDictionary; diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index 31968b618..2d32f57f4 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -48,6 +48,49 @@ impl<'a> AddressableMut for StringViewAddressableMut<'a> { } } +#[derive(Debug)] +pub struct BinaryViewAddressable<'a> { + pub(crate) metadata: &'a [StringViewMetadataUnion], + pub(crate) heap: &'a StringViewHeap, +} + +impl<'a> Addressable for BinaryViewAddressable<'a> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + let m = self.metadata.get(idx)?; + self.heap.get(m) + } +} + +#[derive(Debug)] +pub struct BinaryViewAddressableMut<'a> { + pub(crate) metadata: &'a mut [StringViewMetadataUnion], + pub(crate) heap: &'a mut StringViewHeap, +} + +impl<'a> AddressableMut for BinaryViewAddressableMut<'a> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + let m = self.metadata.get_mut(idx)?; + self.heap.get_mut(m) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + let new_m = self.heap.push_bytes(val); + self.metadata[idx] = new_m; + } +} + /// Metadata for small (<= 12 bytes) varlen data. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(C)] diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index 466aafb3b..3b0362b6a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -10,9 +10,9 @@ use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct UniformExecutor; +pub struct UniformExecutor2; -impl UniformExecutor { +impl UniformExecutor2 { pub fn execute<'a, S, B, Op>( arrays: &[&'a Array2], builder: ArrayBuilder, @@ -127,7 +127,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -162,7 +162,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -195,7 +195,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index af673e6fd..8b12bb776 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -8,7 +8,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalBool_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor2}; use crate::arrays::storage::BooleanStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -111,7 +111,7 @@ impl ScalarFunctionImpl for AndImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor2::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, @@ -204,7 +204,7 @@ impl ScalarFunctionImpl for OrImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor2::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index 55be516e2..20dac7387 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -1,10 +1,13 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UniformExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::scalar::uniform::UniformExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -68,55 +71,70 @@ impl ScalarFunction for Concat { pub struct StringConcatImpl; impl ScalarFunctionImpl for StringConcatImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array2::from_iter([""]); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Zero args should actually error during planning. + // Currently this just sets everything to an empty string. + let mut addressable = output + .data_mut() + .try_as_mut()? + .try_as_string_view_addressable_mut()?; + + for idx in 0..addressable.len() { + addressable.put(idx, ""); + } } - 1 => Ok(inputs[0].clone()), - 2 => { - let a = inputs[0]; - let b = inputs[1]; + 1 => { + let input = &input.arrays()[0]; - let mut string_buf = String::new(); + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| buf.put(s), + )?; + } + 2 => { + let a = &input.arrays()[0]; + let b = &input.arrays()[0]; - // TODO: Compute data capacity. + let mut str_buf = String::new(); - BinaryExecutor2::execute::( + BinaryExecutor::execute::( a, + sel, b, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(a.logical_len()), - }, - |a, b, buf| { - string_buf.clear(); - string_buf.push_str(a); - string_buf.push_str(b); - buf.put(string_buf.as_str()); + sel, + OutBuffer::from_array(output)?, + |s1, s2, buf| { + str_buf.clear(); + str_buf.push_str(s1); + str_buf.push_str(s2); + buf.put(&str_buf); }, - ) + )?; } _ => { - let mut string_buf = String::new(); + let mut str_buf = String::new(); - UniformExecutor::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(inputs[0].logical_len()), - }, - |strings, buf| { - string_buf.clear(); - for s in strings { - string_buf.push_str(s); + UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |ss, buf| { + str_buf.clear(); + for s in ss { + str_buf.push_str(s); } - buf.put(string_buf.as_str()); + buf.put(&str_buf); }, - ) + )?; } } + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 3ea1f64a9..2197a7ea0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -82,15 +84,19 @@ pub struct StringContainsConstantImpl { } impl ScalarFunctionImpl for StringContainsConstantImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.contains(&self.constant)) - }) + UnaryExecutor::execute::( + haystack, + sel, + OutBuffer::from_array(output)?, + |haystack, buf| { + let v = haystack.contains(&self.constant); + buf.put(&v); + }, + ) } } @@ -98,17 +104,21 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { pub struct StringContainsImpl; impl ScalarFunctionImpl for StringContainsImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; + let needle = &input.arrays()[1]; - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.contains(c)), + BinaryExecutor::execute::( + haystack, + sel, + needle, + sel, + OutBuffer::from_array(output)?, + |haystack, needle, buf| { + let v = haystack.contains(needle); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index 09a975667..102b594fe 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -86,15 +88,19 @@ pub struct EndsWithConstantImpl { } impl ScalarFunctionImpl for EndsWithConstantImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.ends_with(&self.constant)) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let v = s.ends_with(&self.constant); + buf.put(&v); + }, + ) } } @@ -102,17 +108,21 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { pub struct EndsWithImpl; impl ScalarFunctionImpl for EndsWithImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.ends_with(c)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let suffix = &input.arrays()[1]; + + BinaryExecutor::execute::( + strings, + sel, + suffix, + sel, + OutBuffer::from_array(output)?, + |s, suffix, buf| { + let v = s.ends_with(&suffix); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index 6b76bc9f0..d471afb60 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -1,10 +1,11 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBinary, PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalBinary_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -64,18 +65,19 @@ impl ScalarFunction for Length { pub struct StrLengthImpl; impl ScalarFunctionImpl for StrLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| { - let len = v.chars().count() as i64; - buf.put(&len) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let len = s.chars().count() as i64; + buf.put(&len) + }, + ) } } @@ -145,18 +147,17 @@ impl ScalarFunction for ByteLength { pub struct ByteLengthImpl; impl ScalarFunctionImpl for ByteLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { - buf.put(&(v.len() as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(&(v.len() as i64)), + ) } } @@ -222,18 +223,19 @@ impl ScalarFunction for BitLength { pub struct BitLengthImpl; impl ScalarFunctionImpl for BitLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { - let bit_len = v.len() * 8; - buf.put(&(bit_len as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| { + let bit_len = v.len() * 8; + buf.put(&(bit_len as i64)) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index 3d626f36b..2452725e0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -1,11 +1,13 @@ use rayexec_error::{Result, ResultExt}; use regex::{escape, Regex}; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,16 +86,19 @@ pub struct LikeConstImpl { } impl ScalarFunctionImpl for LikeConstImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - let b = self.constant.is_match(s); - buf.put(&b); - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let b = self.constant.is_match(s); + buf.put(&b); + }, + ) } } @@ -101,22 +106,23 @@ impl ScalarFunctionImpl for LikeConstImpl { pub struct LikeImpl; impl ScalarFunctionImpl for LikeImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let patterns = &input.arrays()[2]; let mut s_buf = String::new(); - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |a, b, buf| { - match like_pattern_to_regex(&mut s_buf, b, Some('\\')) { + BinaryExecutor::execute::( + strings, + sel, + patterns, + sel, + OutBuffer::from_array(output)?, + |s, pattern, buf| { + match like_pattern_to_regex(&mut s_buf, pattern, Some('\\')) { Ok(pat) => { - let b = pat.is_match(a); + let b = pat.is_match(s); buf.put(&b); } Err(_) => {