diff --git a/Cargo.lock b/Cargo.lock index e19764bb31a5..1739cbcc7bf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4237,7 +4237,6 @@ name = "re_format" version = "0.10.0-alpha.7+dev" dependencies = [ "arrow2", - "arrow2_convert", "comfy-table", "re_tuid", ] @@ -4683,12 +4682,13 @@ name = "re_tuid" version = "0.10.0-alpha.7+dev" dependencies = [ "arrow2", - "arrow2_convert", "criterion", "document-features", "getrandom", "once_cell", + "re_types", "serde", + "thiserror", "web-time", ] @@ -4699,7 +4699,6 @@ dependencies = [ "anyhow", "array-init", "arrow2", - "arrow2_convert", "backtrace", "bytemuck", "document-features", diff --git a/crates/re_arrow_store/src/store_arrow.rs b/crates/re_arrow_store/src/store_arrow.rs index 8f961279d338..08c3fb774f79 100644 --- a/crates/re_arrow_store/src/store_arrow.rs +++ b/crates/re_arrow_store/src/store_arrow.rs @@ -4,7 +4,7 @@ use arrow2::{array::Array, chunk::Chunk, datatypes::Schema}; use nohash_hasher::IntMap; use re_log_types::{ DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID, - COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, + COLUMN_NUM_INSTANCES, }; use re_types::ComponentName; @@ -149,8 +149,7 @@ fn serialize_control_columns( columns.push(insert_id_column); } - let (row_id_field, row_id_column) = - DataTable::serialize_control_column(COLUMN_ROW_ID, col_row_id)?; + let (row_id_field, row_id_column) = DataTable::serialize_control_column(col_row_id)?; schema.fields.push(row_id_field); columns.push(row_id_column); diff --git a/crates/re_arrow_store/src/store_sanity.rs b/crates/re_arrow_store/src/store_sanity.rs index 52b10c3461ec..80e7fdb3c997 100644 --- a/crates/re_arrow_store/src/store_sanity.rs +++ b/crates/re_arrow_store/src/store_sanity.rs @@ -1,8 +1,7 @@ use re_log_types::{ - DataCellColumn, SizeBytes as _, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, - COLUMN_TIMEPOINT, + DataCellColumn, RowId, SizeBytes as _, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_TIMEPOINT, }; -use re_types::ComponentName; +use re_types::{ComponentName, Loggable}; use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable}; @@ -193,7 +192,7 @@ impl IndexedBucket { (!col_insert_id.is_empty()) .then(|| (DataStore::insert_id_key(), col_insert_id.len())), // Some((COLUMN_TIMEPOINT.into(), col_time.len())), - Some((COLUMN_ROW_ID.into(), col_row_id.len())), + Some((RowId::name(), col_row_id.len())), Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())), ] .into_iter() @@ -274,7 +273,7 @@ impl PersistentIndexedTable { let column_lengths = [ (!col_insert_id.is_empty()) .then(|| (DataStore::insert_id_key(), col_insert_id.len())), // - Some((COLUMN_ROW_ID.into(), col_row_id.len())), + Some((RowId::name(), col_row_id.len())), Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())), ] .into_iter() diff --git a/crates/re_format/Cargo.toml b/crates/re_format/Cargo.toml index b502f8f6eb4e..91e6fe82d372 100644 --- a/crates/re_format/Cargo.toml +++ b/crates/re_format/Cargo.toml @@ -17,6 +17,5 @@ all-features = true [dependencies] arrow2.workspace = true -arrow2_convert.workspace = true comfy-table.workspace = true -re_tuid = { workspace = true, features = ["arrow2_convert"] } +re_tuid = { workspace = true, features = ["arrow"] } diff --git a/crates/re_format/src/arrow.rs b/crates/re_format/src/arrow.rs index 13cc69e62dc0..2963dc8771c3 100644 --- a/crates/re_format/src/arrow.rs +++ b/crates/re_format/src/arrow.rs @@ -3,13 +3,12 @@ use std::fmt::Formatter; use arrow2::{ - array::{get_display, Array, ListArray, StructArray}, + array::{get_display, Array, ListArray}, datatypes::{DataType, IntervalUnit, TimeUnit}, }; -use arrow2_convert::deserialize::TryIntoCollection; use comfy_table::{presets, Cell, Table}; -use re_tuid::Tuid; +use re_tuid::{external::re_types::Loggable as _, Tuid}; // --- @@ -39,8 +38,7 @@ pub fn get_custom_display<'a, F: std::fmt::Write + 'a>( if let Some(DataType::Extension(name, _, _)) = datatype { // TODO(#1775): This should be registered dynamically. - // NOTE: Can't call `Tuid::name()`, `Component` lives in `re_log_types`. - if name.as_str() == "rerun.tuid" { + if name.as_str() == Tuid::name() { return Box::new(|w, index| { if let Some(tuid) = parse_tuid(array, index) { w.write_fmt(format_args!("{tuid}")) @@ -68,9 +66,8 @@ fn parse_tuid(array: &dyn Array, index: usize) -> Option { // New control columns: it's not a list to begin with! _ => (array.to_boxed(), index), }; - let array = array.as_any().downcast_ref::()?; - let tuids: Vec = TryIntoCollection::try_into_collection(array.to_boxed()).ok()?; + let tuids = Tuid::from_arrow(array.as_ref()).ok()?; tuids.get(index).copied() } @@ -228,7 +225,11 @@ where .map(|(name, data_type)| { Cell::new(format!( "{}\n---\n{}", - name.replace("rerun.components.", "").replace("rerun.", ""), + name.trim_start_matches("rerun.archetypes.") + .trim_start_matches("rerun.components.") + .trim_start_matches("rerun.datatypes.") + .trim_start_matches("rerun.controls.") + .trim_start_matches("rerun."), DisplayDataType(data_type.clone()) )) }); diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 9e0181aca145..e8a658a628fd 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -37,7 +37,7 @@ re_format.workspace = true re_log.workspace = true re_string_interner.workspace = true re_tracing.workspace = true -re_tuid = { workspace = true, features = ["arrow2_convert"] } +re_tuid = { workspace = true, features = ["arrow"] } re_types = { workspace = true, features = ["image"] } # External diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index 939e55dae58b..918e41b594b4 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -106,21 +106,8 @@ impl SizeBytes for DataCellRow { // --- /// A unique ID for a [`DataRow`]. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Hash, - arrow2_convert::ArrowField, - arrow2_convert::ArrowSerialize, - arrow2_convert::ArrowDeserialize, -)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[arrow_field(transparent)] pub struct RowId(pub(crate) re_tuid::Tuid); impl std::fmt::Display for RowId { @@ -170,6 +157,8 @@ impl std::ops::DerefMut for RowId { } } +re_tuid::delegate_arrow_tuid!(RowId as "rerun.controls.RowId"); + /// A row's worth of data, i.e. an event: a list of [`DataCell`]s associated with an auto-generated /// `RowId`, a user-specified [`TimePoint`] and [`EntityPath`], and an expected number of /// instances. diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 9acdab868d2f..23020cc71ffe 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use re_types::ComponentName; +use re_types::{ComponentName, Loggable}; use ahash::HashMap; use itertools::{izip, Itertools as _}; @@ -36,6 +36,12 @@ pub enum DataTableError { #[error("Could not serialize/deserialize component instances to/from Arrow: {0}")] Arrow(#[from] arrow2::error::Error), + #[error("Could not serialize component instances to/from Arrow: {0}")] + Serialization(#[from] re_types::SerializationError), + + #[error("Could not deserialize component instances to/from Arrow: {0}")] + Deserialization(#[from] re_types::DeserializationError), + // Needed to handle TryFrom -> T #[error("Infallible")] Unreachable(#[from] std::convert::Infallible), @@ -128,21 +134,8 @@ impl SizeBytes for DataCellColumn { // --- /// A unique ID for a [`DataTable`]. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Hash, - arrow2_convert::ArrowField, - arrow2_convert::ArrowSerialize, - arrow2_convert::ArrowDeserialize, -)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[arrow_field(transparent)] pub struct TableId(pub(crate) re_tuid::Tuid); impl std::fmt::Display for TableId { @@ -183,6 +176,8 @@ impl std::ops::DerefMut for TableId { } } +re_tuid::delegate_arrow_tuid!(TableId as "rerun.controls.TableId"); + /// A sparse table's worth of data, i.e. a batch of events: a collection of [`DataRow`]s. /// This is the top-level layer in our data model. /// @@ -557,7 +552,6 @@ use arrow2_convert::{ // TODO(#1696): Those names should come from the datatypes themselves. pub const COLUMN_INSERT_ID: &str = "rerun.insert_id"; -pub const COLUMN_ROW_ID: &str = "rerun.row_id"; pub const COLUMN_TIMEPOINT: &str = "rerun.timepoint"; pub const COLUMN_ENTITY_PATH: &str = "rerun.entity_path"; pub const COLUMN_NUM_INSTANCES: &str = "rerun.num_instances"; @@ -566,7 +560,6 @@ pub const METADATA_KIND: &str = "rerun.kind"; pub const METADATA_KIND_DATA: &str = "data"; pub const METADATA_KIND_CONTROL: &str = "control"; pub const METADATA_KIND_TIME: &str = "time"; -pub const METADATA_TABLE_ID: &str = "rerun.table_id"; impl DataTable { /// Serializes the entire table into an arrow payload and schema. @@ -669,13 +662,12 @@ impl DataTable { let mut schema = Schema::default(); let mut columns = Vec::new(); - let (row_id_field, row_id_column) = - Self::serialize_control_column(COLUMN_ROW_ID, col_row_id)?; + let (row_id_field, row_id_column) = Self::serialize_control_column(col_row_id)?; schema.fields.push(row_id_field); columns.push(row_id_column); let (entity_path_field, entity_path_column) = - Self::serialize_control_column(COLUMN_ENTITY_PATH, col_entity_path)?; + Self::serialize_control_column_legacy(COLUMN_ENTITY_PATH, col_entity_path)?; schema.fields.push(entity_path_field); columns.push(entity_path_column); @@ -687,13 +679,39 @@ impl DataTable { schema.fields.push(num_instances_field); columns.push(num_instances_column); - schema.metadata = [(METADATA_TABLE_ID.into(), table_id.to_string())].into(); + schema.metadata = [(TableId::name().to_string(), table_id.to_string())].into(); Ok((schema, columns)) } /// Serializes a single control column: an iterable of dense arrow-like data. - pub fn serialize_control_column + 'static>( + pub fn serialize_control_column<'a, C: re_types::Component + 'a>( + values: &'a [C], + ) -> DataTableResult<(Field, Box)> + where + std::borrow::Cow<'a, C>: std::convert::From<&'a C>, + { + re_tracing::profile_function!(); + + let data: Box = C::to_arrow(values)?; + + // TODO(#3360): rethink our extension and metadata usage + let mut field = C::arrow_field() + .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into()); + + // TODO(#3360): rethink our extension and metadata usage + if let DataType::Extension(name, _, _) = data.data_type() { + field + .metadata + .extend([("ARROW:extension:name".to_owned(), name.clone())]); + } + + Ok((field, data)) + } + + /// Serializes a single control column: an iterable of dense arrow-like data. + // TODO(#3741): remove once arrow2_convert is fully gone + pub fn serialize_control_column_legacy + 'static>( name: &str, values: &[C], ) -> DataTableResult<(Field, Box)> { @@ -922,8 +940,12 @@ impl DataTable { }; // NOTE: the unwrappings cannot fail since control_index() makes sure the index is valid - let col_row_id = - (&**chunk.get(control_index(COLUMN_ROW_ID)?).unwrap()).try_into_collection()?; + let col_row_id = RowId::from_arrow( + chunk + .get(control_index(RowId::name().as_str())?) + .unwrap() + .as_ref(), + )?; let col_entity_path = (&**chunk.get(control_index(COLUMN_ENTITY_PATH)?).unwrap()).try_into_collection()?; // TODO(#3741): This is unnecessarily slow… @@ -956,7 +978,7 @@ impl DataTable { Ok(Self { table_id, - col_row_id, + col_row_id: col_row_id.into(), col_timelines, col_entity_path, col_num_instances, diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 6f97bcb09aae..2161b4562602 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -47,8 +47,8 @@ pub use self::data_row::{ pub use self::data_table::{ DataCellColumn, DataCellOptVec, DataTable, DataTableError, DataTableResult, EntityPathVec, ErasedTimeVec, NumInstancesVec, RowIdVec, TableId, TimePointVec, COLUMN_ENTITY_PATH, - COLUMN_INSERT_ID, COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, COLUMN_TIMEPOINT, METADATA_KIND, - METADATA_KIND_CONTROL, METADATA_KIND_DATA, + COLUMN_INSERT_ID, COLUMN_NUM_INSTANCES, COLUMN_TIMEPOINT, METADATA_KIND, METADATA_KIND_CONTROL, + METADATA_KIND_DATA, }; pub use self::index::*; pub use self::path::*; diff --git a/crates/re_tuid/Cargo.toml b/crates/re_tuid/Cargo.toml index 9941c6b88ca2..0b423be02549 100644 --- a/crates/re_tuid/Cargo.toml +++ b/crates/re_tuid/Cargo.toml @@ -19,8 +19,8 @@ all-features = true [features] default = [] -## Enable converting Tuid to arrow2 -arrow2_convert = ["dep:arrow2", "dep:arrow2_convert"] +## Enable (de)serialization using Arrow. +arrow = ["dep:re_types", "dep:arrow2", "dep:thiserror"] ## Enable (de)serialization using serde. serde = ["dep:serde"] @@ -32,10 +32,13 @@ getrandom = "0.2" once_cell.workspace = true web-time.workspace = true -# Optional dependencies: -arrow2 = { workspace = true, optional = true } # used by arrow2_convert -arrow2_convert = { workspace = true, optional = true } +# Optional dependencies + +re_types = { workspace = true, optional = true } + +arrow2 = { workspace = true, optional = true } serde = { version = "1", features = ["derive"], optional = true } +thiserror = { workspace = true, optional = true } [dev-dependencies] criterion = "0.5" diff --git a/crates/re_tuid/benches/bench_tuid.rs b/crates/re_tuid/benches/bench_tuid.rs index 61f070775549..86c2566ebe2d 100644 --- a/crates/re_tuid/benches/bench_tuid.rs +++ b/crates/re_tuid/benches/bench_tuid.rs @@ -8,5 +8,43 @@ fn bench_tuid(c: &mut Criterion) { }); } -criterion_group!(benches, bench_tuid,); +#[cfg(feature = "arrow")] +fn bench_arrow(c: &mut Criterion) { + use arrow2::array::Array; + use re_types::Loggable as _; + + for elem_count in [1, 1000] { + { + let mut group = c.benchmark_group(format!("arrow/serialize/elem_count={elem_count}")); + group.throughput(criterion::Throughput::Elements(elem_count)); + + let tuids = vec![re_tuid::Tuid::random(); elem_count as usize]; + + group.bench_function("arrow2", |b| { + b.iter(|| { + let data: Box = re_tuid::Tuid::to_arrow(tuids.clone()).unwrap(); + criterion::black_box(data) + }); + }); + } + + { + let mut group = c.benchmark_group(format!("arrow/deserialize/elem_count={elem_count}")); + group.throughput(criterion::Throughput::Elements(elem_count)); + + let data: Box = + re_tuid::Tuid::to_arrow(vec![re_tuid::Tuid::random(); elem_count as usize]) + .unwrap(); + + group.bench_function("arrow2", |b| { + b.iter(|| { + let tuids = re_tuid::Tuid::from_arrow(data.as_ref()).unwrap(); + criterion::black_box(tuids) + }); + }); + } + } +} + +criterion_group!(benches, bench_tuid, bench_arrow); criterion_main!(benches); diff --git a/crates/re_tuid/src/arrow.rs b/crates/re_tuid/src/arrow.rs new file mode 100644 index 000000000000..6137965e4437 --- /dev/null +++ b/crates/re_tuid/src/arrow.rs @@ -0,0 +1,214 @@ +use arrow2::{ + array::{StructArray, UInt64Array}, + datatypes::{DataType, Field}, +}; +use re_types::Loggable; + +use crate::Tuid; + +// --- + +impl<'a> From for ::std::borrow::Cow<'a, Tuid> { + #[inline] + fn from(value: Tuid) -> Self { + std::borrow::Cow::Owned(value) + } +} + +impl<'a> From<&'a Tuid> for ::std::borrow::Cow<'a, Tuid> { + #[inline] + fn from(value: &'a Tuid) -> Self { + std::borrow::Cow::Borrowed(value) + } +} + +impl Loggable for Tuid { + type Name = re_types::ComponentName; + + #[inline] + fn name() -> Self::Name { + "rerun.controls.TUID".into() + } + + #[inline] + fn arrow_datatype() -> arrow2::datatypes::DataType { + DataType::Struct(vec![ + Field::new("time_ns", DataType::UInt64, false), + Field::new("inc", DataType::UInt64, false), + ]) + } + + fn to_arrow_opt<'a>( + _data: impl IntoIterator>>>, + ) -> re_types::SerializationResult> + where + Self: 'a, + { + Err(re_types::SerializationError::not_implemented( + Self::name(), + "TUIDs are never nullable, use `to_arrow()` instead", + )) + } + + #[inline] + fn to_arrow<'a>( + data: impl IntoIterator>>, + ) -> re_types::SerializationResult> + where + Self: 'a, + { + let (time_ns_values, inc_values): (Vec<_>, Vec<_>) = data + .into_iter() + .map(Into::into) + .map(|tuid| (tuid.time_ns, tuid.inc)) + .unzip(); + + let values = vec![ + UInt64Array::from_vec(time_ns_values).boxed(), + UInt64Array::from_vec(inc_values).boxed(), + ]; + let validity = None; + + // TODO(#3360): We use the extended type here because we rely on it for formatting. + Ok(StructArray::new(Self::extended_arrow_datatype(), values, validity).boxed()) + } + + fn from_arrow( + array: &dyn ::arrow2::array::Array, + ) -> re_types::DeserializationResult> { + let expected_datatype = Self::arrow_datatype(); + let actual_datatype = array.data_type().to_logical_type(); + if actual_datatype != &expected_datatype { + return Err(re_types::DeserializationError::datatype_mismatch( + expected_datatype, + actual_datatype.clone(), + )); + } + + // NOTE: Unwrap is safe everywhere below, datatype is checked above. + // NOTE: We don't even look at the validity, our datatype says we don't care. + + let array = array.as_any().downcast_ref::().unwrap(); + + // TODO(cmc): Can we rely on the fields ordering from the datatype? I would assume not + // since we generally cannot rely on anything when it comes to arrow… + // If we could, that would also impact our codegen deserialization path. + let (time_ns_index, inc_index) = { + let mut time_ns_index = None; + let mut inc_index = None; + for (i, field) in array.fields().iter().enumerate() { + if field.name == "time_ns" { + time_ns_index = Some(i); + } else if field.name == "inc" { + inc_index = Some(i); + } + } + (time_ns_index.unwrap(), inc_index.unwrap()) + }; + + let get_buffer = |field_index: usize| { + array.values()[field_index] + .as_any() + .downcast_ref::() + .unwrap() + .values() + }; + + let time_ns_buffer = get_buffer(time_ns_index); + let inc_buffer = get_buffer(inc_index); + + if time_ns_buffer.len() != inc_buffer.len() { + return Err( + re_types::DeserializationError::mismatched_struct_field_lengths( + "time_ns", + time_ns_buffer.len(), + "inc", + inc_buffer.len(), + ), + ); + } + + Ok(time_ns_buffer + .iter() + .copied() + .zip(inc_buffer.iter().copied()) + .map(|(time_ns, inc)| Self { time_ns, inc }) + .collect()) + } +} + +/// Implements [`re_types::Component`] for any given type that is a simple wrapper +/// (newtype) around a [`Tuid`]. +/// +/// Usage: +/// ```ignore +/// re_tuid::delegate_arrow_tuid!(RowId); +/// ``` +#[macro_export] +macro_rules! delegate_arrow_tuid { + ($typ:ident as $fqname:expr) => { + impl<'a> From<$typ> for ::std::borrow::Cow<'a, $typ> { + #[inline] + fn from(value: $typ) -> Self { + ::std::borrow::Cow::Owned(value) + } + } + + impl<'a> From<&'a $typ> for ::std::borrow::Cow<'a, $typ> { + #[inline] + fn from(value: &'a $typ) -> Self { + ::std::borrow::Cow::Borrowed(value) + } + } + + impl ::re_types::Loggable for $typ { + type Name = ::re_types::ComponentName; + + #[inline] + fn name() -> Self::Name { + $fqname.into() + } + + #[inline] + fn arrow_datatype() -> ::arrow2::datatypes::DataType { + $crate::Tuid::arrow_datatype() + } + + #[inline] + fn to_arrow_opt<'a>( + values: impl IntoIterator>>>, + ) -> re_types::SerializationResult> + where + Self: 'a, + { + Err(re_types::SerializationError::not_implemented( + Self::name(), + "TUIDs are never nullable, use `to_arrow()` instead", + )) + } + + #[inline] + fn to_arrow<'a>( + values: impl IntoIterator>>, + ) -> re_types::SerializationResult> { + let values = values.into_iter().map(|value| { + let value: ::std::borrow::Cow<'a, Self> = value.into(); + value.into_owned() + }); + <$crate::Tuid as ::re_types::Loggable>::to_arrow( + values.into_iter().map(|$typ(tuid)| tuid), + ) + } + + #[inline] + fn from_arrow( + array: &dyn arrow2::array::Array, + ) -> re_types::DeserializationResult> { + Ok(<$crate::Tuid as ::re_types::Loggable>::from_arrow(array)? + .into_iter() + .map(|tuid| Self(tuid)) + .collect()) + } + } + }; +} diff --git a/crates/re_tuid/src/lib.rs b/crates/re_tuid/src/lib.rs index b78595d4a01b..169f5ac152ef 100644 --- a/crates/re_tuid/src/lib.rs +++ b/crates/re_tuid/src/lib.rs @@ -7,10 +7,6 @@ //! #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[cfg_attr( - feature = "arrow2_convert", - derive(arrow2_convert::ArrowSerialize, arrow2_convert::ArrowDeserialize) -)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Tuid { /// Approximate nanoseconds since epoch. @@ -21,21 +17,12 @@ pub struct Tuid { inc: u64, } -#[cfg(feature = "arrow2_convert")] -arrow2_convert::arrow_enable_vec_for_type!(Tuid); +#[cfg(feature = "arrow")] +pub mod arrow; -// TODO(#3741): shouldn't have to write this manually -#[cfg(feature = "arrow2_convert")] -impl arrow2_convert::field::ArrowField for Tuid { - type Type = Self; - - fn data_type() -> arrow2::datatypes::DataType { - let datatype = arrow2::datatypes::DataType::Struct(<[_]>::into_vec(Box::new([ - ::field("time_ns"), - ::field("inc"), - ]))); - arrow2::datatypes::DataType::Extension("rerun.tuid".into(), Box::new(datatype), None) - } +pub mod external { + #[cfg(feature = "arrow")] + pub use re_types; } impl std::fmt::Display for Tuid { diff --git a/crates/re_types/Cargo.toml b/crates/re_types/Cargo.toml index d55a60a38b28..a5ef9c05fb07 100644 --- a/crates/re_types/Cargo.toml +++ b/crates/re_types/Cargo.toml @@ -68,7 +68,6 @@ arrow2 = { workspace = true, features = [ "io_print", "compute_concatenate", ] } -arrow2_convert.workspace = true backtrace.workspace = true bytemuck = { workspace = true, features = ["derive", "extern_crate_alloc"] } document-features.workspace = true diff --git a/crates/re_types/src/components/text_ext.rs b/crates/re_types/src/components/text_ext.rs index e671c653ea61..d262379bf37e 100644 --- a/crates/re_types/src/components/text_ext.rs +++ b/crates/re_types/src/components/text_ext.rs @@ -27,13 +27,3 @@ impl std::borrow::Borrow for Text { self.as_str() } } - -// TODO(emilk): required to use with `range_entity_with_primary`. remove once the migration is over -impl arrow2_convert::field::ArrowField for Text { - type Type = Self; - - fn data_type() -> arrow2::datatypes::DataType { - use crate::Loggable as _; - Self::arrow_field().data_type - } -} diff --git a/crates/re_types/src/components/text_log_level_ext.rs b/crates/re_types/src/components/text_log_level_ext.rs index 20a0b5aaba29..94c619cd7ac4 100644 --- a/crates/re_types/src/components/text_log_level_ext.rs +++ b/crates/re_types/src/components/text_log_level_ext.rs @@ -45,13 +45,3 @@ impl std::borrow::Borrow for TextLogLevel { self.as_str() } } - -// TODO(emilk): required to use with `range_entity_with_primary`. remove once the migration is over -impl arrow2_convert::field::ArrowField for TextLogLevel { - type Type = Self; - - fn data_type() -> arrow2::datatypes::DataType { - use crate::Loggable as _; - Self::arrow_field().data_type - } -} diff --git a/crates/re_types/src/result.rs b/crates/re_types/src/result.rs index 294f36a7fd42..e02ca596495c 100644 --- a/crates/re_types/src/result.rs +++ b/crates/re_types/src/result.rs @@ -22,6 +22,13 @@ pub enum SerializationError { #[error("arrow2-convert serialization Failed: {0}")] ArrowConvertFailure(String), + + #[error("{fqname} doesn't support deserialization: {reason}")] + NotImplemented { + fqname: String, + reason: String, + backtrace: _Backtrace, + }, } impl std::fmt::Debug for SerializationError { @@ -49,12 +56,22 @@ impl SerializationError { } } + #[inline] + pub fn not_implemented(fqname: impl AsRef, reason: impl AsRef) -> Self { + Self::NotImplemented { + fqname: fqname.as_ref().into(), + reason: reason.as_ref().into(), + backtrace: ::backtrace::Backtrace::new_unresolved(), + } + } + /// Returns the _unresolved_ backtrace associated with this error, if it exists. /// /// Call `resolve()` on the returned [`_Backtrace`] to resolve it (costly!). pub fn backtrace(&self) -> Option<_Backtrace> { match self { - Self::MissingExtensionMetadata { backtrace, .. } => Some(backtrace.clone()), + Self::MissingExtensionMetadata { backtrace, .. } + | Self::NotImplemented { backtrace, .. } => Some(backtrace.clone()), SerializationError::Context { .. } | SerializationError::ArrowConvertFailure(_) => None, } } @@ -95,6 +112,17 @@ pub enum DeserializationError { backtrace: _Backtrace, }, + #[error( + "Found {field1_length} {field1_name:?} values vs. {field2_length} {field2_name:?} values" + )] + MismatchedStructFieldLengths { + field1_name: String, + field1_length: usize, + field2_name: String, + field2_length: usize, + backtrace: _Backtrace, + }, + #[error("Expected union arm {arm_name:?} (#{arm_index}) to be present in {datatype:#?}")] MissingUnionArm { datatype: ::arrow2::datatypes::DataType, @@ -173,6 +201,22 @@ impl DeserializationError { } } + #[inline] + pub fn mismatched_struct_field_lengths( + field1_name: impl AsRef, + field1_length: usize, + field2_name: impl AsRef, + field2_length: usize, + ) -> Self { + Self::MismatchedStructFieldLengths { + field1_name: field1_name.as_ref().into(), + field1_length, + field2_name: field2_name.as_ref().into(), + field2_length, + backtrace: ::backtrace::Backtrace::new_unresolved(), + } + } + #[inline] pub fn missing_union_arm( datatype: arrow2::datatypes::DataType, @@ -230,6 +274,7 @@ impl DeserializationError { } => source.backtrace(), DeserializationError::NotImplemented { backtrace, .. } | DeserializationError::MissingStructField { backtrace, .. } + | DeserializationError::MismatchedStructFieldLengths { backtrace, .. } | DeserializationError::MissingUnionArm { backtrace, .. } | DeserializationError::MissingData { backtrace } | DeserializationError::MissingComponent { backtrace, .. }