Skip to content

Commit

Permalink
Remove Loggable{Batch}::arrow_field (#7257)
Browse files Browse the repository at this point in the history
It doesn't make any sense for a `ComponentBatch` to have any say in what
the final `ArrowField` should look like.

An `ArrowField` is a `Chunk`/`RecordBatch`/`Schema`-level concern that
only makes sense during IO/transport/FFI/storage/etc, and which requires
external context that a single `ComponentBatch` on its own has no idea
of.

---

Part of a lot of clean up I want to while we head towards:
* #7245
* #3741
  • Loading branch information
teh-cmc authored Aug 23, 2024
1 parent 02946bd commit e9de566
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 119 deletions.
42 changes: 4 additions & 38 deletions crates/store/re_types_core/src/archetype.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::sync::Arc;

use crate::{
ComponentBatch, ComponentName, DeserializationResult, MaybeOwnedComponentBatch,
SerializationResult, _Backtrace,
Expand Down Expand Up @@ -220,26 +218,10 @@ impl<A: Archetype> crate::LoggableBatch for GenericIndicatorComponent<A> {
1
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
let name = self.name().to_string();
arrow2::datatypes::Field::new(
name.clone(),
arrow2::datatypes::DataType::Extension(
name,
Arc::new(arrow2::datatypes::DataType::Null),
None,
),
false,
)
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn arrow2::array::Array>> {
Ok(
arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances())
.boxed(),
)
let datatype = arrow2::datatypes::DataType::Null;
Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed())
}
}

Expand Down Expand Up @@ -278,26 +260,10 @@ impl crate::LoggableBatch for NamedIndicatorComponent {
1
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
let name = self.name().to_string();
arrow2::datatypes::Field::new(
name.clone(),
arrow2::datatypes::DataType::Extension(
name,
Arc::new(arrow2::datatypes::DataType::Null),
None,
),
false,
)
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn arrow2::array::Array>> {
Ok(
arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances())
.boxed(),
)
let datatype = arrow2::datatypes::DataType::Null;
Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed())
}
}

Expand Down
9 changes: 8 additions & 1 deletion crates/store/re_types_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,14 @@ pub trait AsComponents {
comp_batch
.as_ref()
.to_arrow()
.map(|array| (comp_batch.as_ref().arrow_field(), array))
.map(|array| {
let field = arrow2::datatypes::Field::new(
comp_batch.name().to_string(),
array.data_type().clone(),
false,
);
(field, array)
})
.with_context(comp_batch.as_ref().name())
})
.collect()
Expand Down
22 changes: 0 additions & 22 deletions crates/store/re_types_core/src/loggable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes {

/// Given an iterator of options of owned or reference values to the current
/// [`Loggable`], serializes them into an Arrow array.
/// The Arrow array's datatype will match [`Loggable::arrow_field`].
///
/// When using Rerun's builtin components & datatypes, this can only fail if the data
/// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays,
Expand All @@ -57,25 +56,10 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes {
)
}

/// The underlying [`arrow2::datatypes::Field`], including datatype extensions.
///
/// The default implementation will simply wrap [`Self::extended_arrow_datatype`] in a
/// [`arrow2::datatypes::Field`], which is what you want in most cases (e.g. because you want
/// to declare the field as nullable).
#[inline]
fn arrow_field() -> arrow2::datatypes::Field {
arrow2::datatypes::Field::new(
Self::name().to_string(),
Self::extended_arrow_datatype(),
false,
)
}

// --- Optional serialization methods ---

/// Given an iterator of owned or reference values to the current [`Loggable`], serializes
/// them into an Arrow array.
/// The Arrow array's datatype will match [`Loggable::arrow_field`].
///
/// When using Rerun's builtin components & datatypes, this can only fail if the data
/// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays,
Expand All @@ -94,9 +78,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes {
// --- Optional deserialization methods ---

/// Given an Arrow array, deserializes it into a collection of [`Loggable`]s.
///
/// This will _never_ fail if the Arrow array's datatype matches the one returned by
/// [`Loggable::arrow_field`].
#[inline]
fn from_arrow(data: &dyn ::arrow2::array::Array) -> DeserializationResult<Vec<Self>> {
re_tracing::profile_function!();
Expand All @@ -112,9 +93,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes {
}

/// Given an Arrow array, deserializes it into a collection of optional [`Loggable`]s.
///
/// This will _never_ fail if the Arrow array's datatype matches the one returned by
/// [`Loggable::arrow_field`].
fn from_arrow_opt(
data: &dyn ::arrow2::array::Array,
) -> DeserializationResult<Vec<Option<Self>>> {
Expand Down
58 changes: 0 additions & 58 deletions crates/store/re_types_core/src/loggable_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ pub trait LoggableBatch {
/// The number of component instances stored into this batch.
fn num_instances(&self) -> usize;

/// The underlying [`arrow2::datatypes::Field`], including datatype extensions.
fn arrow_field(&self) -> arrow2::datatypes::Field;

/// Serializes the batch into an Arrow array.
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>>;
}
Expand Down Expand Up @@ -98,11 +95,6 @@ impl<'a> LoggableBatch for MaybeOwnedComponentBatch<'a> {
self.as_ref().num_instances()
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
self.as_ref().arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
self.as_ref().to_arrow()
Expand All @@ -126,11 +118,6 @@ impl<L: Clone + Loggable> LoggableBatch for L {
1
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow([std::borrow::Cow::Borrowed(self)])
Expand All @@ -154,11 +141,6 @@ impl<L: Clone + Loggable> LoggableBatch for Option<L> {
self.is_some() as usize
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v)))
Expand All @@ -182,11 +164,6 @@ impl<L: Clone + Loggable> LoggableBatch for Vec<L> {
self.len()
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v)))
Expand All @@ -210,11 +187,6 @@ impl<L: Loggable> LoggableBatch for Vec<Option<L>> {
self.len()
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow_opt(
Expand All @@ -241,11 +213,6 @@ impl<L: Loggable, const N: usize> LoggableBatch for [L; N] {
N
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v)))
Expand All @@ -269,11 +236,6 @@ impl<L: Loggable, const N: usize> LoggableBatch for [Option<L>; N] {
N
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow_opt(
Expand All @@ -300,11 +262,6 @@ impl<'a, L: Loggable> LoggableBatch for &'a [L] {
self.len()
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v)))
Expand All @@ -328,11 +285,6 @@ impl<'a, L: Loggable> LoggableBatch for &'a [Option<L>] {
self.len()
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow_opt(
Expand All @@ -359,11 +311,6 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [L; N] {
N
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v)))
Expand All @@ -387,11 +334,6 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [Option<L>; N] {
N
}

#[inline]
fn arrow_field(&self) -> arrow2::datatypes::Field {
L::arrow_field()
}

#[inline]
fn to_arrow(&self) -> SerializationResult<Box<dyn ::arrow2::array::Array>> {
L::to_arrow_opt(
Expand Down

0 comments on commit e9de566

Please sign in to comment.