Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide additional common builder interfaces with ValuesBuilder, a subtrait of ArrayBuilder. #6927

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ use std::sync::Arc;
///
/// ```
/// # use arrow_array::Array;
/// # use arrow_array::builder::GenericByteBuilder;
/// # use arrow_array::builder::{GenericByteBuilder, ValuesBuilder};
/// # use arrow_array::types::Utf8Type;
/// let mut builder = GenericByteBuilder::<Utf8Type>::new();
/// builder.append_value("hello");
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::array::print_long_array;
use crate::builder::{ArrayBuilder, GenericByteViewBuilder};
use crate::builder::{ArrayBuilder, GenericByteViewBuilder, ValuesBuilder};
use crate::iterator::ArrayIter;
use crate::types::bytes::ByteArrayNativeType;
use crate::types::{BinaryViewType, ByteViewType, StringViewType};
Expand Down Expand Up @@ -841,7 +841,7 @@ impl From<Vec<Option<String>>> for StringViewArray {

#[cfg(test)]
mod tests {
use crate::builder::{BinaryViewBuilder, StringViewBuilder};
use crate::builder::{BinaryViewBuilder, StringViewBuilder, ValuesBuilder};
use crate::{Array, BinaryViewArray, StringViewArray};
use arrow_buffer::{Buffer, ScalarBuffer};
use arrow_data::ByteView;
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field};

use crate::{
builder::StringRunBuilder,
builder::{StringRunBuilder, ValuesBuilder},
make_array,
run_iterator::RunArrayIter,
types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/string_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ pub type LargeStringArray = GenericStringArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::{ListBuilder, PrimitiveBuilder, StringBuilder};
use crate::builder::{ListBuilder, PrimitiveBuilder, StringBuilder, ValuesBuilder};
use crate::types::UInt8Type;
use crate::Array;
use arrow_buffer::Buffer;
Expand Down
28 changes: 16 additions & 12 deletions arrow-array/src/builder/generic_byte_run_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::{
ArrayRef, ArrowPrimitiveType, RunArray,
};

use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder, ValuesBuilder};

use arrow_buffer::ArrowNativeType;

Expand All @@ -33,7 +33,7 @@ use arrow_buffer::ArrowNativeType;
///
/// ```
///
/// # use arrow_array::builder::GenericByteRunBuilder;
/// # use arrow_array::builder::{GenericByteRunBuilder, ValuesBuilder};
/// # use arrow_array::{GenericByteArray, BinaryArray};
/// # use arrow_array::types::{BinaryType, Int16Type};
/// # use arrow_array::{Array, Int16Array};
Expand Down Expand Up @@ -155,21 +155,19 @@ where
}
}

impl<R, V> GenericByteRunBuilder<R, V>
where
R: RunEndIndexType,
V: ByteArrayType,
{
impl<R: RunEndIndexType, T: ByteArrayType> ValuesBuilder<T> for GenericByteRunBuilder<R, T> {
type Value = T::Native;

/// Appends optional value to the logical array encoded by the RunArray.
pub fn append_option(&mut self, input_value: Option<impl AsRef<V::Native>>) {
fn append_option(&mut self, input_value: Option<impl AsRef<Self::Value>>) {
match input_value {
Some(value) => self.append_value(value),
None => self.append_null(),
}
}

/// Appends value to the logical array encoded by the RunArray.
pub fn append_value(&mut self, input_value: impl AsRef<V::Native>) {
fn append_value(&mut self, input_value: impl AsRef<Self::Value>) {
let value: &[u8] = input_value.as_ref().as_ref();
if !self.has_current_value {
self.append_run_end();
Expand All @@ -184,15 +182,21 @@ where
}

/// Appends null to the logical array encoded by the RunArray.
pub fn append_null(&mut self) {
fn append_null(&mut self) {
if self.has_current_value {
self.append_run_end();
self.current_value.clear();
self.has_current_value = false;
}
self.current_run_end_index += 1;
}
}

impl<R, V> GenericByteRunBuilder<R, V>
where
R: RunEndIndexType,
V: ByteArrayType,
{
/// Creates the RunArray and resets the builder.
/// Panics if RunArray cannot be built.
pub fn finish(&mut self) -> RunArray<R> {
Expand Down Expand Up @@ -305,7 +309,7 @@ where
/// // Create a run-end encoded array with run-end indexes data type as `i16`.
/// // The encoded values are Strings.
///
/// # use arrow_array::builder::StringRunBuilder;
/// # use arrow_array::builder::{StringRunBuilder, ValuesBuilder};
/// # use arrow_array::{Int16Array, StringArray};
/// # use arrow_array::types::Int16Type;
/// # use arrow_array::cast::AsArray;
Expand Down Expand Up @@ -341,7 +345,7 @@ pub type LargeStringRunBuilder<K> = GenericByteRunBuilder<K, LargeUtf8Type>;
/// // Create a run-end encoded array with run-end indexes data type as `i16`.
/// // The encoded data is binary values.
///
/// # use arrow_array::builder::BinaryRunBuilder;
/// # use arrow_array::builder::{BinaryRunBuilder, ValuesBuilder};
/// # use arrow_array::{BinaryArray, Int16Array};
/// # use arrow_array::cast::AsArray;
/// # use arrow_array::types::Int16Type;
Expand Down
100 changes: 53 additions & 47 deletions arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;

use super::ValuesBuilder;

/// Builder for [`GenericByteArray`]
///
/// For building strings, see docs on [`GenericStringBuilder`].
Expand Down Expand Up @@ -86,49 +88,6 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
}

/// Appends a value into the builder.
///
/// See the [GenericStringBuilder] documentation for examples of
/// incrementally building string values with multiple `write!` calls.
///
/// # Panics
///
/// Panics if the resulting length of [`Self::values_slice`] would exceed
/// `T::Offset::MAX` bytes.
///
/// For example, this can happen with [`StringArray`] or [`BinaryArray`]
/// where the total length of all values exceeds 2GB
///
/// [`StringArray`]: crate::StringArray
/// [`BinaryArray`]: crate::BinaryArray
#[inline]
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
self.value_builder.append_slice(value.as_ref().as_ref());
self.null_buffer_builder.append(true);
self.offsets_builder.append(self.next_offset());
}

/// Append an `Option` value into the builder.
///
/// - A `None` value will append a null value.
/// - A `Some` value will append the value.
///
/// See [`Self::append_value`] for more panic information.
#[inline]
pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
match value {
None => self.append_null(),
Some(v) => self.append_value(v),
};
}

/// Append a null value into the builder.
#[inline]
pub fn append_null(&mut self) {
self.null_buffer_builder.append(false);
self.offsets_builder.append(self.next_offset());
}

/// Builds the [`GenericByteArray`] and reset this builder.
pub fn finish(&mut self) -> GenericByteArray<T> {
let array_type = T::DATA_TYPE;
Expand Down Expand Up @@ -228,6 +187,53 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
}
}

impl<T: ByteArrayType> ValuesBuilder<T> for GenericByteBuilder<T> {
type Value = T::Native;

/// Appends a value into the builder.
///
/// See the [GenericStringBuilder] documentation for examples of
/// incrementally building string values with multiple `write!` calls.
///
/// # Panics
///
/// Panics if the resulting length of [`Self::values_slice`] would exceed
/// `T::Offset::MAX` bytes.
///
/// For example, this can happen with [`StringArray`] or [`BinaryArray`]
/// where the total length of all values exceeds 2GB
///
/// [`StringArray`]: crate::StringArray
/// [`BinaryArray`]: crate::BinaryArray
#[inline]
fn append_value(&mut self, value: impl AsRef<T::Native>) {
self.value_builder.append_slice(value.as_ref().as_ref());
self.null_buffer_builder.append(true);
self.offsets_builder.append(self.next_offset());
}

/// Append an `Option` value into the builder.
///
/// - A `None` value will append a null value.
/// - A `Some` value will append the value.
///
/// See [`Self::append_value`] for more panic information.
#[inline]
fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
match value {
None => self.append_null(),
Some(v) => self.append_value(v),
};
}

/// Append a null value into the builder.
#[inline]
fn append_null(&mut self) {
self.null_buffer_builder.append(false);
self.offsets_builder.append(self.next_offset());
}
}

impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
#[inline]
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
Expand All @@ -249,7 +255,7 @@ impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBui
///
/// # Example writing strings with `append_value`
/// ```
/// # use arrow_array::builder::GenericStringBuilder;
/// # use arrow_array::builder::{GenericStringBuilder, ValuesBuilder};
/// let mut builder = GenericStringBuilder::<i32>::new();
///
/// // Write one string value
Expand All @@ -267,7 +273,7 @@ impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBui
///
/// ```
/// # use std::fmt::Write;
/// # use arrow_array::builder::GenericStringBuilder;
/// # use arrow_array::builder::{GenericStringBuilder, ValuesBuilder};
/// let mut builder = GenericStringBuilder::<i32>::new();
///
/// // Write data in multiple `write!` calls
Expand Down Expand Up @@ -302,7 +308,7 @@ impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
///
/// # Example
/// ```
/// # use arrow_array::builder::GenericBinaryBuilder;
/// # use arrow_array::builder::{GenericBinaryBuilder, ValuesBuilder};
/// let mut builder = GenericBinaryBuilder::<i32>::new();
///
/// // Write data
Expand All @@ -321,7 +327,7 @@ impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
///
/// ```
/// # use std::io::Write;
/// # use arrow_array::builder::GenericBinaryBuilder;
/// # use arrow_array::builder::{GenericBinaryBuilder, ValuesBuilder};
/// let mut builder = GenericBinaryBuilder::<i32>::new();
///
/// // Write data in multiple `write_bytes` calls
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder, ValuesBuilder};
use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray, TypedDictionaryArray};
use arrow_buffer::ArrowNativeType;
Expand Down
Loading
Loading