diff --git a/Cargo.lock b/Cargo.lock index 2528ad7d681c..6a11c06acdda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6342,6 +6342,7 @@ name = "re_types_core" version = "0.21.0-alpha.1+dev" dependencies = [ "anyhow", + "arrow", "backtrace", "bytemuck", "criterion", diff --git a/crates/store/re_chunk/src/batcher.rs b/crates/store/re_chunk/src/batcher.rs index 3e09dbb81c2d..f155fa5a0802 100644 --- a/crates/store/re_chunk/src/batcher.rs +++ b/crates/store/re_chunk/src/batcher.rs @@ -5,7 +5,7 @@ use std::{ time::{Duration, Instant}, }; -use arrow2::array::{Array as ArrowArray, PrimitiveArray as ArrowPrimitiveArray}; +use arrow2::array::{Array as Arrow2Array, PrimitiveArray as Arrow2PrimitiveArray}; use crossbeam::channel::{Receiver, Sender}; use nohash_hasher::IntMap; @@ -679,14 +679,14 @@ pub struct PendingRow { /// The component data. /// /// Each array is a single component, i.e. _not_ a list array. - pub components: BTreeMap>, + pub components: BTreeMap>, } impl PendingRow { #[inline] pub fn new( timepoint: TimePoint, - components: BTreeMap>, + components: BTreeMap>, ) -> Self { Self { row_id: RowId::new(), @@ -726,7 +726,7 @@ impl PendingRow { let timelines = timepoint .into_iter() .map(|(timeline, time)| { - let times = ArrowPrimitiveArray::::from_vec(vec![time.as_i64()]); + let times = Arrow2PrimitiveArray::::from_vec(vec![time.as_i64()]); let time_column = TimeColumn::new(Some(true), timeline, times); (timeline, time_column) }) @@ -799,7 +799,7 @@ impl PendingRow { re_tracing::profile_scope!("iterate per timeline set"); // Then we split the micro batches even further -- one sub-batch per unique set of datatypes. - let mut per_datatype_set: IntMap> = + let mut per_datatype_set: IntMap> = Default::default(); { re_tracing::profile_scope!("compute datatype sets"); @@ -826,7 +826,7 @@ impl PendingRow { // Create all the logical list arrays that we're going to need, accounting for the // possibility of sparse components in the data. - let mut all_components: IntMap>> = + let mut all_components: IntMap>> = IntMap::default(); for row in &rows { for component_name in row.components.keys() { @@ -893,7 +893,7 @@ impl PendingRow { arrays.push( row_components .get(component_name) - .map(|array| &**array as &dyn ArrowArray), + .map(|array| &**array as &dyn Arrow2Array), ); } } @@ -967,7 +967,7 @@ impl PendingTimeColumn { TimeColumn { timeline, - times: ArrowPrimitiveArray::::from_vec(times).to(timeline.datatype()), + times: Arrow2PrimitiveArray::::from_vec(times).to(timeline.datatype()), is_sorted, time_range, } @@ -1047,7 +1047,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![42, 43, 44]), + Arrow2PrimitiveArray::from_vec(vec![42, 43, 44]), ), )]; let expected_components = [( @@ -1203,7 +1203,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![42, 44]), + Arrow2PrimitiveArray::from_vec(vec![42, 44]), ), )]; let expected_components = [( @@ -1231,7 +1231,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![43]), + Arrow2PrimitiveArray::from_vec(vec![43]), ), )]; let expected_components = [( @@ -1318,7 +1318,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![42]), + Arrow2PrimitiveArray::from_vec(vec![42]), ), )]; let expected_components = [( @@ -1347,7 +1347,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![43, 44]), + Arrow2PrimitiveArray::from_vec(vec![43, 44]), ), ), ( @@ -1355,7 +1355,7 @@ mod tests { TimeColumn::new( Some(true), timeline2, - ArrowPrimitiveArray::from_vec(vec![1000, 1001]), + Arrow2PrimitiveArray::from_vec(vec![1000, 1001]), ), ), ]; @@ -1439,7 +1439,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![42, 44]), + Arrow2PrimitiveArray::from_vec(vec![42, 44]), ), )]; let expected_components = [( @@ -1467,7 +1467,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![43]), + Arrow2PrimitiveArray::from_vec(vec![43]), ), )]; let expected_components = [( @@ -1569,7 +1569,7 @@ mod tests { TimeColumn::new( Some(false), timeline1, - ArrowPrimitiveArray::from_vec(vec![45, 42, 43, 44]), + Arrow2PrimitiveArray::from_vec(vec![45, 42, 43, 44]), ), ), ( @@ -1577,7 +1577,7 @@ mod tests { TimeColumn::new( Some(false), timeline2, - ArrowPrimitiveArray::from_vec(vec![1003, 1000, 1001, 1002]), + Arrow2PrimitiveArray::from_vec(vec![1003, 1000, 1001, 1002]), ), ), ]; @@ -1683,7 +1683,7 @@ mod tests { TimeColumn::new( Some(false), timeline1, - ArrowPrimitiveArray::from_vec(vec![45, 42, 43]), + Arrow2PrimitiveArray::from_vec(vec![45, 42, 43]), ), ), ( @@ -1691,7 +1691,7 @@ mod tests { TimeColumn::new( Some(false), timeline2, - ArrowPrimitiveArray::from_vec(vec![1003, 1000, 1001]), + Arrow2PrimitiveArray::from_vec(vec![1003, 1000, 1001]), ), ), ]; @@ -1722,7 +1722,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::from_vec(vec![44]), + Arrow2PrimitiveArray::from_vec(vec![44]), ), ), ( @@ -1730,7 +1730,7 @@ mod tests { TimeColumn::new( Some(true), timeline2, - ArrowPrimitiveArray::from_vec(vec![1002]), + Arrow2PrimitiveArray::from_vec(vec![1002]), ), ), ]; diff --git a/crates/store/re_chunk/src/builder.rs b/crates/store/re_chunk/src/builder.rs index 9da99f0924e8..306ccc369f1f 100644 --- a/crates/store/re_chunk/src/builder.rs +++ b/crates/store/re_chunk/src/builder.rs @@ -1,8 +1,8 @@ use std::collections::BTreeMap; use arrow2::{ - array::{Array as ArrowArray, PrimitiveArray as ArrowPrimitiveArray}, - datatypes::DataType as ArrowDatatype, + array::{Array as Arrow2Array, PrimitiveArray as Arrow2PrimitiveArray}, + datatypes::DataType as Arrow2Datatype, }; use itertools::Itertools; @@ -23,7 +23,7 @@ pub struct ChunkBuilder { row_ids: Vec, timelines: BTreeMap, - components: BTreeMap>>>, + components: BTreeMap>>>, } impl Chunk { @@ -63,7 +63,7 @@ impl ChunkBuilder { mut self, row_id: RowId, timepoint: impl Into, - components: impl IntoIterator>)>, + components: impl IntoIterator>)>, ) -> Self { let components = components.into_iter().collect_vec(); @@ -107,7 +107,7 @@ impl ChunkBuilder { self, row_id: RowId, timepoint: impl Into, - components: impl IntoIterator)>, + components: impl IntoIterator)>, ) -> Self { self.with_sparse_row( row_id, @@ -258,7 +258,7 @@ impl ChunkBuilder { #[inline] pub fn build_with_datatypes( self, - datatypes: &IntMap, + datatypes: &IntMap, ) -> ChunkResult { let Self { id, @@ -343,7 +343,7 @@ impl TimeColumnBuilder { pub fn build(self) -> TimeColumn { let Self { timeline, times } = self; - let times = ArrowPrimitiveArray::::from_vec(times).to(timeline.datatype()); + let times = Arrow2PrimitiveArray::::from_vec(times).to(timeline.datatype()); TimeColumn::new(None, timeline, times) } } diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs index 26847f60e28f..4bbfaf833c12 100644 --- a/crates/store/re_chunk/src/chunk.rs +++ b/crates/store/re_chunk/src/chunk.rs @@ -5,8 +5,8 @@ use std::{ use arrow2::{ array::{ - Array as ArrowArray, ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray, - StructArray as ArrowStructArray, + Array as Arrow2Array, ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, + StructArray as Arrow2StructArray, }, Either, }; @@ -74,7 +74,7 @@ pub struct Chunk { pub(crate) is_sorted: bool, /// The respective [`RowId`]s for each row of data. - pub(crate) row_ids: ArrowStructArray, + pub(crate) row_ids: Arrow2StructArray, /// The time columns. /// @@ -90,7 +90,7 @@ pub struct Chunk { /// Sparse so that we can e.g. log a `Position` at one timestamp but not a `Color`. // // TODO(#6576): support non-list based columns? - pub(crate) components: BTreeMap>, + pub(crate) components: BTreeMap>, } impl PartialEq for Chunk { @@ -269,7 +269,7 @@ impl Chunk { // Unwrap: native RowIds cannot fail to serialize. .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() // Unwrap: RowId schema is known in advance to be a struct array -- always. .unwrap() .clone(); @@ -299,7 +299,7 @@ impl Chunk { // Unwrap: native RowIds cannot fail to serialize. .unwrap() .as_any() - .downcast_ref::() + .downcast_ref::() // Unwrap: RowId schema is known in advance to be a struct array -- always. .unwrap() .clone(); @@ -555,7 +555,7 @@ pub struct TimeColumn { /// * This is guaranteed to always be dense, because chunks are split anytime a timeline is /// added or removed. /// * This cannot ever contain `TimeInt::STATIC`, since static data doesn't even have timelines. - pub(crate) times: ArrowPrimitiveArray, + pub(crate) times: Arrow2PrimitiveArray, /// Is [`Self::times`] sorted? /// @@ -583,9 +583,9 @@ impl Chunk { id: ChunkId, entity_path: EntityPath, is_sorted: Option, - row_ids: ArrowStructArray, + row_ids: Arrow2StructArray, timelines: BTreeMap, - components: BTreeMap>, + components: BTreeMap>, ) -> ChunkResult { let mut chunk = Self { id, @@ -619,7 +619,7 @@ impl Chunk { is_sorted: Option, row_ids: &[RowId], timelines: BTreeMap, - components: BTreeMap>, + components: BTreeMap>, ) -> ChunkResult { re_tracing::profile_function!(); let row_ids = row_ids @@ -629,7 +629,7 @@ impl Chunk { reason: format!("RowIds failed to serialize: {err}"), })? .as_any() - .downcast_ref::() + .downcast_ref::() // NOTE: impossible, but better safe than sorry. .ok_or_else(|| ChunkError::Malformed { reason: "RowIds failed to downcast".to_owned(), @@ -650,7 +650,7 @@ impl Chunk { id: ChunkId, entity_path: EntityPath, timelines: BTreeMap, - components: BTreeMap>, + components: BTreeMap>, ) -> ChunkResult { let count = components .iter() @@ -680,8 +680,8 @@ impl Chunk { id: ChunkId, entity_path: EntityPath, is_sorted: Option, - row_ids: ArrowStructArray, - components: BTreeMap>, + row_ids: Arrow2StructArray, + components: BTreeMap>, ) -> ChunkResult { Self::new( id, @@ -700,13 +700,13 @@ impl Chunk { entity_path, heap_size_bytes: Default::default(), is_sorted: true, - row_ids: ArrowStructArray::new_empty(RowId::arrow2_datatype()), + row_ids: Arrow2StructArray::new_empty(RowId::arrow2_datatype()), timelines: Default::default(), components: Default::default(), } } - /// Unconditionally inserts an [`ArrowListArray`] as a component column. + /// Unconditionally inserts an [`Arrow2ListArray`] as a component column. /// /// Removes and replaces the column if it already exists. /// @@ -715,7 +715,7 @@ impl Chunk { pub fn add_component( &mut self, component_name: ComponentName, - list_array: ArrowListArray, + list_array: Arrow2ListArray, ) -> ChunkResult<()> { self.components.insert(component_name, list_array); self.sanity_check() @@ -744,7 +744,7 @@ impl TimeColumn { pub fn new( is_sorted: Option, timeline: Timeline, - times: ArrowPrimitiveArray, + times: Arrow2PrimitiveArray, ) -> Self { re_tracing::profile_function_if!(1000 < times.len(), format!("{} times", times.len())); @@ -811,7 +811,7 @@ impl TimeColumn { Self::new( None, Timeline::new_sequence(name.into()), - ArrowPrimitiveArray::::from_vec(time_vec), + Arrow2PrimitiveArray::::from_vec(time_vec), ) } @@ -838,7 +838,7 @@ impl TimeColumn { Self::new( None, Timeline::new_temporal(name.into()), - ArrowPrimitiveArray::::from_vec(time_vec), + Arrow2PrimitiveArray::::from_vec(time_vec), ) } @@ -868,7 +868,7 @@ impl TimeColumn { Self::new( None, Timeline::new_temporal(name.into()), - ArrowPrimitiveArray::::from_vec(time_vec), + Arrow2PrimitiveArray::::from_vec(time_vec), ) } } @@ -929,13 +929,13 @@ impl Chunk { } #[inline] - pub fn row_ids_array(&self) -> &ArrowStructArray { + pub fn row_ids_array(&self) -> &Arrow2StructArray { &self.row_ids } /// Returns the [`RowId`]s in their raw-est form: a tuple of (times, counters) arrays. #[inline] - pub fn row_ids_raw(&self) -> (&ArrowPrimitiveArray, &ArrowPrimitiveArray) { + pub fn row_ids_raw(&self) -> (&Arrow2PrimitiveArray, &Arrow2PrimitiveArray) { let [times, counters] = self.row_ids.values() else { panic!("RowIds are corrupt -- this should be impossible (sanity checked)"); }; @@ -943,13 +943,13 @@ impl Chunk { #[allow(clippy::unwrap_used)] let times = times .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); // sanity checked #[allow(clippy::unwrap_used)] let counters = counters .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); // sanity checked (times, counters) @@ -1055,7 +1055,7 @@ impl Chunk { } #[inline] - pub fn components(&self) -> &BTreeMap> { + pub fn components(&self) -> &BTreeMap> { &self.components } @@ -1098,7 +1098,7 @@ impl TimeColumn { } #[inline] - pub fn times_array(&self) -> &ArrowPrimitiveArray { + pub fn times_array(&self) -> &Arrow2PrimitiveArray { &self.times } @@ -1137,7 +1137,7 @@ impl TimeColumn { // TODO(cmc): This needs to be stored in chunk metadata and transported across IPC. pub fn time_range_per_component( &self, - components: &BTreeMap>, + components: &BTreeMap>, ) -> BTreeMap { let times = self.times_raw(); components diff --git a/crates/store/re_chunk/src/helpers.rs b/crates/store/re_chunk/src/helpers.rs index 43f89fab2bd7..c27613137e6d 100644 --- a/crates/store/re_chunk/src/helpers.rs +++ b/crates/store/re_chunk/src/helpers.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow2::array::Array as ArrowArray; +use arrow2::array::Array as Arrow2Array; use re_log_types::{TimeInt, Timeline}; use re_types_core::{Component, ComponentName, SizeBytes}; @@ -20,7 +20,7 @@ impl Chunk { &self, component_name: &ComponentName, row_index: usize, - ) -> Option>> { + ) -> Option>> { self.components.get(component_name).and_then(|list_array| { if list_array.len() > row_index { list_array @@ -63,7 +63,7 @@ impl Chunk { component_name: &ComponentName, row_index: usize, instance_index: usize, - ) -> Option>> { + ) -> Option>> { let res = self.component_batch_raw(component_name, row_index)?; let array = match res { @@ -116,7 +116,7 @@ impl Chunk { &self, component_name: &ComponentName, row_index: usize, - ) -> Option>> { + ) -> Option>> { let res = self.component_batch_raw(component_name, row_index)?; let array = match res { @@ -256,7 +256,7 @@ impl UnitChunkShared { pub fn component_batch_raw( &self, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { debug_assert!(self.num_rows() == 1); self.components .get(component_name) @@ -282,7 +282,7 @@ impl UnitChunkShared { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option>> { + ) -> Option>> { let array = self.component_batch_raw(component_name)?; if array.len() > instance_index { Some(Ok(array.sliced(instance_index, 1))) @@ -325,7 +325,7 @@ impl UnitChunkShared { pub fn component_mono_raw( &self, component_name: &ComponentName, - ) -> Option>> { + ) -> Option>> { let array = self.component_batch_raw(component_name)?; if array.len() == 1 { Some(Ok(array.sliced(0, 1))) diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index 6668dcb47a46..647e311aefa8 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -2,9 +2,9 @@ use std::sync::Arc; use arrow2::{ array::{ - Array as ArrowArray, FixedSizeListArray as ArrowFixedSizeListArray, - ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray, - Utf8Array as ArrowUtf8Array, + Array as Arrow2Array, FixedSizeListArray as Arrow2FixedSizeListArray, + ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, + Utf8Array as Arrow2Utf8Array, }, Either, }; @@ -212,7 +212,7 @@ impl Chunk { pub fn iter_component_arrays( &self, component_name: &ComponentName, - ) -> impl Iterator> + '_ { + ) -> impl Iterator> + '_ { let Some(list_array) = self.components.get(component_name) else { return Either::Left(std::iter::empty()); }; @@ -246,7 +246,7 @@ impl Chunk { let Some(values) = list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -291,7 +291,7 @@ impl Chunk { let Some(fixed_size_list_array) = list_array .values() .as_any() - .downcast_ref::() + .downcast_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -304,7 +304,7 @@ impl Chunk { let Some(values) = fixed_size_list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -353,7 +353,7 @@ impl Chunk { let Some(inner_list_array) = list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -369,7 +369,7 @@ impl Chunk { let Some(fixed_size_list_array) = inner_list_array .values() .as_any() - .downcast_ref::() + .downcast_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -382,7 +382,7 @@ impl Chunk { let Some(values) = fixed_size_list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -435,7 +435,7 @@ impl Chunk { let Some(utf8_array) = list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -486,7 +486,7 @@ impl Chunk { let Some(inner_list_array) = list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -499,7 +499,7 @@ impl Chunk { let Some(values) = inner_list_array .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); diff --git a/crates/store/re_chunk/src/latest_at.rs b/crates/store/re_chunk/src/latest_at.rs index e4427a7e3a81..c9aa831f6085 100644 --- a/crates/store/re_chunk/src/latest_at.rs +++ b/crates/store/re_chunk/src/latest_at.rs @@ -1,4 +1,4 @@ -use arrow2::array::Array as ArrowArray; +use arrow2::array::Array as Arrow2Array; use re_log_types::{TimeInt, Timeline}; use re_types_core::ComponentName; diff --git a/crates/store/re_chunk/src/lib.rs b/crates/store/re_chunk/src/lib.rs index d04bada784bb..f239dc416266 100644 --- a/crates/store/re_chunk/src/lib.rs +++ b/crates/store/re_chunk/src/lib.rs @@ -40,7 +40,7 @@ pub use self::batcher::{ // Re-exports #[doc(no_inline)] -pub use arrow2::array::Array as ArrowArray; +pub use arrow2::array::Array as Arrow2Array; #[doc(no_inline)] pub use re_log_types::{EntityPath, TimeInt, TimePoint, Timeline, TimelineName}; #[doc(no_inline)] diff --git a/crates/store/re_chunk/src/merge.rs b/crates/store/re_chunk/src/merge.rs index 4ba2ceba41a9..c4f0e2e8c4b4 100644 --- a/crates/store/re_chunk/src/merge.rs +++ b/crates/store/re_chunk/src/merge.rs @@ -1,8 +1,8 @@ use std::collections::BTreeMap; use arrow2::array::{ - Array as ArrowArray, ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray, - StructArray as ArrowStructArray, + Array as Arrow2Array, ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, + StructArray as Arrow2StructArray, }; use itertools::{izip, Itertools}; @@ -52,7 +52,7 @@ impl Chunk { // concatenating 2 RowId arrays must yield another RowId array row_ids .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap() .clone() }; @@ -90,7 +90,7 @@ impl Chunk { crate::util::concat_arrays(&[lhs_list_array, rhs_list_array]).ok()?; let list_array = list_array .as_any() - .downcast_ref::>()? + .downcast_ref::>()? .clone(); Some((*component_name, list_array)) @@ -132,7 +132,7 @@ impl Chunk { crate::util::concat_arrays(&[lhs_list_array, rhs_list_array]).ok()?; let list_array = list_array .as_any() - .downcast_ref::>()? + .downcast_ref::>()? .clone(); Some((*component_name, list_array)) @@ -257,7 +257,7 @@ impl TimeColumn { let times = crate::util::concat_arrays(&[&self.times, &rhs.times]).ok()?; let times = times .as_any() - .downcast_ref::>()? + .downcast_ref::>()? .clone(); Some(Self { diff --git a/crates/store/re_chunk/src/shuffle.rs b/crates/store/re_chunk/src/shuffle.rs index c0df428cb24b..8600964576da 100644 --- a/crates/store/re_chunk/src/shuffle.rs +++ b/crates/store/re_chunk/src/shuffle.rs @@ -1,6 +1,6 @@ use arrow2::{ array::{ - Array as ArrowArray, ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, StructArray, }, offset::Offsets as ArrowOffsets, @@ -212,8 +212,8 @@ impl Chunk { sorted_counters[to] = counters[from]; } - let times = ArrowPrimitiveArray::::from_vec(sorted_times).boxed(); - let counters = ArrowPrimitiveArray::::from_vec(sorted_counters).boxed(); + let times = Arrow2PrimitiveArray::::from_vec(sorted_times).boxed(); + let counters = Arrow2PrimitiveArray::::from_vec(sorted_counters).boxed(); self.row_ids = StructArray::new( self.row_ids.data_type().clone(), @@ -250,7 +250,7 @@ impl Chunk { } *is_sorted = sorted.windows(2).all(|times| times[0] <= times[1]); - *times = ArrowPrimitiveArray::::from_vec(sorted).to(timeline.datatype()); + *times = Arrow2PrimitiveArray::::from_vec(sorted).to(timeline.datatype()); } } @@ -267,7 +267,7 @@ impl Chunk { .collect_vec(); let sorted_arrays = sorted_arrays .iter() - .map(|array| &**array as &dyn ArrowArray) + .map(|array| &**array as &dyn Arrow2Array) .collect_vec(); let datatype = original.data_type().clone(); @@ -281,7 +281,7 @@ impl Chunk { .validity() .map(|validity| swaps.iter().map(|&from| validity.get_bit(from)).collect()); - *original = ArrowListArray::::new(datatype, offsets.into(), values, validity); + *original = Arrow2ListArray::::new(datatype, offsets.into(), values, validity); } } diff --git a/crates/store/re_chunk/src/slice.rs b/crates/store/re_chunk/src/slice.rs index 8e7f3fe4a427..6a3728c3fcef 100644 --- a/crates/store/re_chunk/src/slice.rs +++ b/crates/store/re_chunk/src/slice.rs @@ -1,6 +1,6 @@ use arrow2::array::{ - Array as ArrowArray, BooleanArray as ArrowBooleanArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, ListArray as Arrow2ListArray, + PrimitiveArray as Arrow2PrimitiveArray, StructArray as Arrow2StructArray, }; use itertools::Itertools; @@ -26,7 +26,7 @@ impl Chunk { &self, row_id: RowId, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { let list_array = self.components.get(component_name)?; if self.is_sorted() { @@ -352,7 +352,7 @@ impl Chunk { let mask = validity.iter().collect_vec(); let is_sorted = *is_sorted || (mask.iter().filter(|&&b| b).count() < 2); - let validity_filter = ArrowBooleanArray::from_slice(mask); + let validity_filter = Arrow2BooleanArray::from_slice(mask); let mut chunk = Self { id: *id, @@ -378,7 +378,7 @@ impl Chunk { filtered .with_validity(None) .as_any() - .downcast_ref::>() + .downcast_ref::>() // Unwrap: cannot possibly fail -- going from a ListArray back to a ListArray. .unwrap() .clone() @@ -440,7 +440,7 @@ impl Chunk { entity_path: entity_path.clone(), heap_size_bytes: Default::default(), is_sorted: true, - row_ids: ArrowStructArray::new_empty(row_ids.data_type().clone()), + row_ids: Arrow2StructArray::new_empty(row_ids.data_type().clone()), timelines: timelines .iter() .map(|(&timeline, time_column)| (timeline, time_column.emptied())) @@ -450,7 +450,7 @@ impl Chunk { .map(|(&component_name, list_array)| { ( component_name, - ArrowListArray::new_empty(list_array.data_type().clone()), + Arrow2ListArray::new_empty(list_array.data_type().clone()), ) }) .collect(), @@ -528,7 +528,7 @@ impl Chunk { i.saturating_sub(1) as i32 }) .collect_vec(); - ArrowPrimitiveArray::::from_vec(indices) + Arrow2PrimitiveArray::::from_vec(indices) }; let chunk = Self { @@ -575,7 +575,7 @@ impl Chunk { /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. #[must_use] #[inline] - pub fn filtered(&self, filter: &ArrowBooleanArray) -> Option { + pub fn filtered(&self, filter: &Arrow2BooleanArray) -> Option { let Self { id, entity_path, @@ -661,7 +661,7 @@ impl Chunk { /// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`]. #[must_use] #[inline] - pub fn taken(&self, indices: &ArrowPrimitiveArray) -> Self { + pub fn taken(&self, indices: &Arrow2PrimitiveArray) -> Self { let Self { id, entity_path, @@ -785,7 +785,7 @@ impl TimeColumn { Self::new( is_sorted_opt, *timeline, - ArrowPrimitiveArray::sliced(times.clone(), index, len), + Arrow2PrimitiveArray::sliced(times.clone(), index, len), ) } @@ -804,7 +804,7 @@ impl TimeColumn { Self::new( Some(true), *timeline, - ArrowPrimitiveArray::new_empty(times.data_type().clone()), + Arrow2PrimitiveArray::new_empty(times.data_type().clone()), ) } @@ -812,7 +812,7 @@ impl TimeColumn { /// /// [filter]: arrow2::compute::filter::filter #[inline] - pub(crate) fn filtered(&self, filter: &ArrowBooleanArray) -> Self { + pub(crate) fn filtered(&self, filter: &Arrow2BooleanArray) -> Self { let Self { timeline, times, @@ -850,7 +850,7 @@ impl TimeColumn { /// /// [take]: arrow2::compute::take::take #[inline] - pub(crate) fn taken(&self, indices: &ArrowPrimitiveArray) -> Self { + pub(crate) fn taken(&self, indices: &Arrow2PrimitiveArray) -> Self { let Self { timeline, times, @@ -1363,7 +1363,7 @@ mod tests { // basic { - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( (0..chunk.num_rows()).map(|i| i % 2 == 0).collect_vec(), ); let got = chunk.filtered(&filter).unwrap(); @@ -1394,7 +1394,7 @@ mod tests { // shorter { - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( (0..chunk.num_rows() / 2).map(|i| i % 2 == 0).collect_vec(), ); let got = chunk.filtered(&filter); @@ -1403,7 +1403,7 @@ mod tests { // longer { - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( (0..chunk.num_rows() * 2).map(|i| i % 2 == 0).collect_vec(), ); let got = chunk.filtered(&filter); @@ -1508,7 +1508,7 @@ mod tests { // basic { - let indices = ArrowPrimitiveArray::::from_vec( + let indices = Arrow2PrimitiveArray::::from_vec( (0..chunk.num_rows() as i32) .filter(|i| i % 2 == 0) .collect_vec(), @@ -1541,7 +1541,7 @@ mod tests { // repeated { - let indices = ArrowPrimitiveArray::::from_vec( + let indices = Arrow2PrimitiveArray::::from_vec( std::iter::repeat(2i32) .take(chunk.num_rows() * 2) .collect_vec(), diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index c00753caa556..3b49d06d58c0 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -2,13 +2,13 @@ use std::collections::BTreeMap; use arrow2::{ array::{ - Array as ArrowArray, ListArray, PrimitiveArray as ArrowPrimitiveArray, - StructArray as ArrowStructArray, + Array as Arrow2Array, ListArray, PrimitiveArray as Arrow2PrimitiveArray, + StructArray as Arrow2StructArray, }, - chunk::Chunk as ArrowChunk, + chunk::Chunk as Arrow2Chunk, datatypes::{ - DataType as ArrowDatatype, Field as ArrowField, Metadata as ArrowMetadata, - Schema as ArrowSchema, TimeUnit as ArrowTimeUnit, + DataType as Arrow2Datatype, Field as ArrowField, Metadata as Arrow2Metadata, + Schema as Arrow2Schema, TimeUnit as ArrowTimeUnit, }, }; @@ -36,10 +36,10 @@ pub struct TransportChunk { /// /// Take a look at the `TransportChunk::CHUNK_METADATA_*` and `TransportChunk::FIELD_METADATA_*` /// constants for more information about available metadata. - pub schema: ArrowSchema, + pub schema: Arrow2Schema, /// All the control, time and component data. - pub data: ArrowChunk>, + pub data: Arrow2Chunk>, } impl std::fmt::Display for TransportChunk { @@ -57,38 +57,38 @@ impl std::fmt::Display for TransportChunk { // TODO(#6572): Relying on Arrow's native schema metadata feature is bound to fail, we need to // switch to something more powerful asap. impl TransportChunk { - /// The key used to identify a Rerun [`ChunkId`] in chunk-level [`ArrowSchema`] metadata. + /// The key used to identify a Rerun [`ChunkId`] in chunk-level [`Arrow2Schema`] metadata. pub const CHUNK_METADATA_KEY_ID: &'static str = "rerun.id"; - /// The key used to identify a Rerun [`EntityPath`] in chunk-level [`ArrowSchema`] metadata. + /// The key used to identify a Rerun [`EntityPath`] in chunk-level [`Arrow2Schema`] metadata. pub const CHUNK_METADATA_KEY_ENTITY_PATH: &'static str = "rerun.entity_path"; /// The key used to identify the size in bytes of the data, once loaded in memory, in chunk-level - /// [`ArrowSchema`] metadata. + /// [`Arrow2Schema`] metadata. pub const CHUNK_METADATA_KEY_HEAP_SIZE_BYTES: &'static str = "rerun.heap_size_bytes"; - /// The marker used to identify whether a chunk is sorted in chunk-level [`ArrowSchema`] metadata. + /// The marker used to identify whether a chunk is sorted in chunk-level [`Arrow2Schema`] metadata. /// /// The associated value is irrelevant -- if this marker is present, then it is true. /// /// Chunks are ascendingly sorted by their `RowId` column. pub const CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID: &'static str = "rerun.is_sorted"; - /// The key used to identify the kind of a Rerun column in field-level [`ArrowSchema`] metadata. + /// The key used to identify the kind of a Rerun column in field-level [`Arrow2Schema`] metadata. /// /// That is: control columns (e.g. `row_id`), time columns or component columns. pub const FIELD_METADATA_KEY_KIND: &'static str = "rerun.kind"; - /// The value used to identify a Rerun time column in field-level [`ArrowSchema`] metadata. + /// The value used to identify a Rerun time column in field-level [`Arrow2Schema`] metadata. pub const FIELD_METADATA_VALUE_KIND_TIME: &'static str = "time"; - /// The value used to identify a Rerun control column in field-level [`ArrowSchema`] metadata. + /// The value used to identify a Rerun control column in field-level [`Arrow2Schema`] metadata. pub const FIELD_METADATA_VALUE_KIND_CONTROL: &'static str = "control"; - /// The value used to identify a Rerun data column in field-level [`ArrowSchema`] metadata. + /// The value used to identify a Rerun data column in field-level [`Arrow2Schema`] metadata. pub const FIELD_METADATA_VALUE_KIND_DATA: &'static str = "data"; - /// The marker used to identify whether a column is sorted in field-level [`ArrowSchema`] metadata. + /// The marker used to identify whether a column is sorted in field-level [`Arrow2Schema`] metadata. /// /// The associated value is irrelevant -- if this marker is present, then it is true. /// @@ -98,9 +98,9 @@ impl TransportChunk { pub const FIELD_METADATA_MARKER_IS_SORTED_BY_TIME: &'static str = Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID; - /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for a Rerun [`ChunkId`]. + /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for a Rerun [`ChunkId`]. #[inline] - pub fn chunk_metadata_id(id: ChunkId) -> ArrowMetadata { + pub fn chunk_metadata_id(id: ChunkId) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_ID.to_owned(), @@ -110,9 +110,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for the in-memory size in bytes. + /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for the in-memory size in bytes. #[inline] - pub fn chunk_metadata_heap_size_bytes(heap_size_bytes: u64) -> ArrowMetadata { + pub fn chunk_metadata_heap_size_bytes(heap_size_bytes: u64) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_HEAP_SIZE_BYTES.to_owned(), @@ -122,9 +122,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for a Rerun [`EntityPath`]. + /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for a Rerun [`EntityPath`]. #[inline] - pub fn chunk_metadata_entity_path(entity_path: &EntityPath) -> ArrowMetadata { + pub fn chunk_metadata_entity_path(entity_path: &EntityPath) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_ENTITY_PATH.to_owned(), @@ -134,9 +134,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for an `IS_SORTED` marker. + /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for an `IS_SORTED` marker. #[inline] - pub fn chunk_metadata_is_sorted() -> ArrowMetadata { + pub fn chunk_metadata_is_sorted() -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID.to_owned(), @@ -146,9 +146,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun time column. + /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun time column. #[inline] - pub fn field_metadata_time_column() -> ArrowMetadata { + pub fn field_metadata_time_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -158,9 +158,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun control column. + /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun control column. #[inline] - pub fn field_metadata_control_column() -> ArrowMetadata { + pub fn field_metadata_control_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -170,9 +170,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun data column. + /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun data column. #[inline] - pub fn field_metadata_data_column() -> ArrowMetadata { + pub fn field_metadata_data_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -182,9 +182,9 @@ impl TransportChunk { .into() } - /// Returns the appropriate field-level [`ArrowSchema`] metadata for an `IS_SORTED` marker. + /// Returns the appropriate field-level [`Arrow2Schema`] metadata for an `IS_SORTED` marker. #[inline] - pub fn field_metadata_is_sorted() -> ArrowMetadata { + pub fn field_metadata_is_sorted() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_MARKER_IS_SORTED_BY_TIME.to_owned(), @@ -259,7 +259,7 @@ impl TransportChunk { pub fn columns<'a>( &'a self, kind: &'a str, - ) -> impl Iterator)> + 'a { + ) -> impl Iterator)> + 'a { self.schema .fields .iter() @@ -273,7 +273,7 @@ impl TransportChunk { } #[inline] - pub fn all_columns(&self) -> impl Iterator)> + '_ { + pub fn all_columns(&self) -> impl Iterator)> + '_ { self.schema .fields .iter() @@ -283,19 +283,19 @@ impl TransportChunk { /// Iterates all control columns present in this chunk. #[inline] - pub fn controls(&self) -> impl Iterator)> { + pub fn controls(&self) -> impl Iterator)> { self.columns(Self::FIELD_METADATA_VALUE_KIND_CONTROL) } /// Iterates all data columns present in this chunk. #[inline] - pub fn components(&self) -> impl Iterator)> { + pub fn components(&self) -> impl Iterator)> { self.columns(Self::FIELD_METADATA_VALUE_KIND_DATA) } /// Iterates all timeline columns present in this chunk. #[inline] - pub fn timelines(&self) -> impl Iterator)> { + pub fn timelines(&self) -> impl Iterator)> { self.columns(Self::FIELD_METADATA_VALUE_KIND_TIME) } @@ -349,7 +349,7 @@ impl Chunk { components, } = self; - let mut schema = ArrowSchema::default(); + let mut schema = Arrow2Schema::default(); let mut columns = Vec::with_capacity(1 /* row_ids */ + timelines.len() + components.len()); // Chunk-level metadata @@ -437,7 +437,7 @@ impl Chunk { Ok(TransportChunk { schema, - data: ArrowChunk::new(columns), + data: Arrow2Chunk::new(columns), }) } @@ -472,7 +472,7 @@ impl Chunk { row_ids .as_any() - .downcast_ref::() + .downcast_ref::() .ok_or_else(|| ChunkError::Malformed { reason: format!( "RowId data has the wrong datatype: expected {:?} but got {:?} instead", @@ -492,8 +492,8 @@ impl Chunk { for (field, column) in transport.timelines() { // See also [`Timeline::datatype`] let timeline = match column.data_type().to_logical_type() { - ArrowDatatype::Int64 => Timeline::new_sequence(field.name.as_str()), - ArrowDatatype::Timestamp(ArrowTimeUnit::Nanosecond, None) => { + Arrow2Datatype::Int64 => Timeline::new_sequence(field.name.as_str()), + Arrow2Datatype::Timestamp(ArrowTimeUnit::Nanosecond, None) => { Timeline::new_temporal(field.name.as_str()) } _ => { @@ -509,7 +509,7 @@ impl Chunk { let times = column .as_any() - .downcast_ref::>() + .downcast_ref::>() .ok_or_else(|| ChunkError::Malformed { reason: format!( "time column '{}' is not deserializable ({:?})", @@ -653,7 +653,7 @@ mod tests { TimeColumn::new( Some(true), timeline1, - ArrowPrimitiveArray::::from_vec(vec![42, 43, 44, 45]), + Arrow2PrimitiveArray::::from_vec(vec![42, 43, 44, 45]), ), )) .collect(); diff --git a/crates/store/re_chunk/src/util.rs b/crates/store/re_chunk/src/util.rs index dbbe5ffc786b..c8e64b143c03 100644 --- a/crates/store/re_chunk/src/util.rs +++ b/crates/store/re_chunk/src/util.rs @@ -1,11 +1,11 @@ use arrow2::{ array::{ - Array as ArrowArray, BooleanArray as ArrowBooleanArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, DictionaryArray as ArrowDictionaryArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, + PrimitiveArray as Arrow2PrimitiveArray, }, bitmap::Bitmap as ArrowBitmap, - datatypes::DataType as ArrowDatatype, + datatypes::DataType as Arrow2Datatype, offset::Offsets as ArrowOffsets, }; use itertools::Itertools; @@ -42,7 +42,9 @@ pub fn is_list_array_semantically_empty(list_array: &ArrowListArray) -> boo /// /// Returns `None` if `arrays` is empty. #[inline] -pub fn arrays_to_list_array_opt(arrays: &[Option<&dyn ArrowArray>]) -> Option> { +pub fn arrays_to_list_array_opt( + arrays: &[Option<&dyn Arrow2Array>], +) -> Option> { let datatype = arrays .iter() .flatten() @@ -57,8 +59,8 @@ pub fn arrays_to_list_array_opt(arrays: &[Option<&dyn ArrowArray>]) -> Option], + array_datatype: Arrow2Datatype, + arrays: &[Option<&dyn Arrow2Array>], ) -> Option> { let arrays_dense = arrays.iter().flatten().copied().collect_vec(); @@ -107,8 +109,8 @@ pub fn arrays_to_list_array( // TODO(cmc): A possible improvement would be to pick the smallest key datatype possible based // on the cardinality of the input arrays. pub fn arrays_to_dictionary( - array_datatype: &ArrowDatatype, - arrays: &[Option<(Idx, &dyn ArrowArray)>], + array_datatype: &Arrow2Datatype, + arrays: &[Option<(Idx, &dyn Arrow2Array)>], ) -> Option> { // Dedupe the input arrays based on the given primary key. let arrays_dense_deduped = arrays @@ -160,7 +162,7 @@ pub fn arrays_to_dictionary( ArrowListArray::::new(array_datatype.clone(), offsets.into(), values, None).to_boxed() }; - let datatype = ArrowDatatype::Dictionary( + let datatype = Arrow2Datatype::Dictionary( arrow2::datatypes::IntegerType::Int32, std::sync::Arc::new(array_datatype.clone()), true, // is_sorted @@ -170,7 +172,7 @@ pub fn arrays_to_dictionary( // unique values. ArrowDictionaryArray::try_new( datatype, - ArrowPrimitiveArray::::from(keys), + Arrow2PrimitiveArray::::from(keys), data.to_boxed(), ) .ok() @@ -308,7 +310,10 @@ pub fn pad_list_array_front( /// Returns a new [`ArrowListArray`] with len `entries`. /// /// Each entry will be an empty array of the given `child_datatype`. -pub fn new_list_array_of_empties(child_datatype: ArrowDatatype, len: usize) -> ArrowListArray { +pub fn new_list_array_of_empties( + child_datatype: Arrow2Datatype, + len: usize, +) -> ArrowListArray { let empty_array = arrow2::array::new_empty_array(child_datatype); #[allow(clippy::unwrap_used)] // yes, these are indeed lengths @@ -329,7 +334,7 @@ pub fn new_list_array_of_empties(child_datatype: ArrowDatatype, len: usize) -> A /// Returns an error if the arrays don't share the exact same datatype. /// /// [concatenate]: arrow2::compute::concatenate::concatenate -pub fn concat_arrays(arrays: &[&dyn ArrowArray]) -> arrow2::error::Result> { +pub fn concat_arrays(arrays: &[&dyn Arrow2Array]) -> arrow2::error::Result> { if arrays.len() == 1 { return Ok(arrays[0].to_boxed()); } @@ -350,7 +355,7 @@ pub fn concat_arrays(arrays: &[&dyn ArrowArray]) -> arrow2::error::Result(array: &A, filter: &ArrowBooleanArray) -> A { +pub fn filter_array(array: &A, filter: &Arrow2BooleanArray) -> A { assert_eq!( array.len(), filter.len(), "the length of the filter must match the length of the array (the underlying kernel will panic otherwise)", @@ -388,9 +393,9 @@ pub fn filter_array(array: &A, filter: &ArrowBooleanArray // That is not possible with vanilla `ListArray`s since they don't expose any way to encode optional lengths, // in addition to offsets. // For internal stuff, we could perhaps provide a custom implementation that returns a `DictionaryArray` instead? -pub fn take_array( +pub fn take_array( array: &A, - indices: &ArrowPrimitiveArray, + indices: &Arrow2PrimitiveArray, ) -> A { debug_assert!( indices.validity().is_none(), @@ -433,7 +438,7 @@ pub fn take_array( // --- -use arrow2::{chunk::Chunk as ArrowChunk, datatypes::Schema as ArrowSchema}; +use arrow2::{chunk::Chunk as Arrow2Chunk, datatypes::Schema as Arrow2Schema}; /// Concatenate multiple [`TransportChunk`]s into one. /// @@ -441,7 +446,7 @@ use arrow2::{chunk::Chunk as ArrowChunk, datatypes::Schema as ArrowSchema}; /// * `arrow2` doesn't have a `RecordBatch` type, therefore we emulate that using our `TransportChunk`s. /// * `arrow-rs` does have one, and it natively supports concatenation. pub fn concatenate_record_batches( - schema: ArrowSchema, + schema: Arrow2Schema, batches: &[TransportChunk], ) -> anyhow::Result { assert!(batches.iter().map(|batch| &batch.schema).all_equal()); @@ -453,7 +458,7 @@ pub fn concatenate_record_batches( let array = concat_arrays( &batches .iter() - .map(|batch| &*batch.data[i] as &dyn ArrowArray) + .map(|batch| &*batch.data[i] as &dyn Arrow2Array) .collect_vec(), )?; arrays.push(array); @@ -462,6 +467,6 @@ pub fn concatenate_record_batches( Ok(TransportChunk { schema, - data: ArrowChunk::new(arrays), + data: Arrow2Chunk::new(arrays), }) } diff --git a/crates/store/re_chunk/tests/latest_at.rs b/crates/store/re_chunk/tests/latest_at.rs index 96ebe4c7bba0..5525a6fd8116 100644 --- a/crates/store/re_chunk/tests/latest_at.rs +++ b/crates/store/re_chunk/tests/latest_at.rs @@ -1,4 +1,4 @@ -use arrow2::datatypes::DataType as ArrowDatatype; +use arrow2::datatypes::DataType as Arrow2Datatype; use nohash_hasher::IntMap; use re_chunk::{Chunk, ComponentName, LatestAtQuery, RowId, TimePoint, Timeline}; @@ -9,7 +9,7 @@ use re_types_core::{Component, Loggable}; const ENTITY_PATH: &str = "my/entity"; -fn datatypes() -> IntMap { +fn datatypes() -> IntMap { [ (MyPoint::name(), MyPoint::arrow2_datatype()), (MyColor::name(), MyColor::arrow2_datatype()), diff --git a/crates/store/re_chunk/tests/memory_test.rs b/crates/store/re_chunk/tests/memory_test.rs index 53cab3fbf2e7..a6c0e7c6df7c 100644 --- a/crates/store/re_chunk/tests/memory_test.rs +++ b/crates/store/re_chunk/tests/memory_test.rs @@ -56,10 +56,10 @@ fn memory_use(run: impl Fn() -> R) -> (R, usize) { use arrow2::{ array::{ - Array as ArrowArray, BooleanArray as ArrowBooleanArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, ListArray as ArrowListArray, + PrimitiveArray as Arrow2PrimitiveArray, }, - offset::Offsets as ArrowOffsets, + offset::Offsets as Arrow2Offsets, }; use itertools::Itertools; @@ -79,14 +79,14 @@ fn concat_does_allocate() { std::iter::repeat(NUM_SCALARS as usize / 10) .take(10) .map(|_| { - ArrowPrimitiveArray::from_vec((0..NUM_SCALARS / 10).collect_vec()).to_boxed() + Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS / 10).collect_vec()).to_boxed() }) .collect_vec() }); let unconcatenated_refs = unconcatenated .0 .iter() - .map(|a| &**a as &dyn ArrowArray) + .map(|a| &**a as &dyn Arrow2Array) .collect_vec(); let concatenated = @@ -117,8 +117,9 @@ fn concat_single_is_noop() { ((unconcatenated, unconcatenated_size_bytes), (concatenated, concatenated_size_bytes)), total_size_bytes, ) = memory_use(|| { - let unconcatenated = - memory_use(|| ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()).to_boxed()); + let unconcatenated = memory_use(|| { + Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()).to_boxed() + }); let concatenated = memory_use(|| re_chunk::util::concat_arrays(&[&*unconcatenated.0]).unwrap()); @@ -140,11 +141,11 @@ fn concat_single_is_noop() { { let unconcatenated = unconcatenated .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let concatenated = concatenated .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -168,10 +169,10 @@ fn filter_does_allocate() { let (((unfiltered, unfiltered_size_bytes), (filtered, filtered_size_bytes)), total_size_bytes) = memory_use(|| { let unfiltered = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -181,7 +182,7 @@ fn filter_does_allocate() { ) }); - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( (0..unfiltered.0.len()).map(|i| i % 2 == 0).collect_vec(), ); let filtered = memory_use(|| re_chunk::util::filter_array(&unfiltered.0, &filter)); @@ -203,12 +204,12 @@ fn filter_does_allocate() { let unfiltered = unfiltered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let filtered = filtered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -230,10 +231,10 @@ fn filter_empty_or_full_is_noop() { let (((unfiltered, unfiltered_size_bytes), (filtered, filtered_size_bytes)), total_size_bytes) = memory_use(|| { let unfiltered = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -243,7 +244,7 @@ fn filter_empty_or_full_is_noop() { ) }); - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( std::iter::repeat(true) .take(unfiltered.0.len()) .collect_vec(), @@ -269,12 +270,12 @@ fn filter_empty_or_full_is_noop() { let unfiltered = unfiltered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let filtered = filtered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -301,10 +302,10 @@ fn take_does_not_allocate() { let (((untaken, untaken_size_bytes), (taken, taken_size_bytes)), total_size_bytes) = memory_use(|| { let untaken = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -314,7 +315,7 @@ fn take_does_not_allocate() { ) }); - let indices = ArrowPrimitiveArray::from_vec( + let indices = Arrow2PrimitiveArray::from_vec( (0..untaken.0.len() as i32) .filter(|i| i % 2 == 0) .collect_vec(), @@ -338,12 +339,12 @@ fn take_does_not_allocate() { let untaken = untaken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let taken = taken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -365,10 +366,10 @@ fn take_empty_or_full_is_noop() { let (((untaken, untaken_size_bytes), (taken, taken_size_bytes)), total_size_bytes) = memory_use(|| { let untaken = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -378,7 +379,7 @@ fn take_empty_or_full_is_noop() { ) }); - let indices = ArrowPrimitiveArray::from_vec((0..untaken.0.len() as i32).collect_vec()); + let indices = Arrow2PrimitiveArray::from_vec((0..untaken.0.len() as i32).collect_vec()); let taken = memory_use(|| re_chunk::util::take_array(&untaken.0, &indices)); (untaken, taken) @@ -400,12 +401,12 @@ fn take_empty_or_full_is_noop() { let untaken = untaken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let taken = taken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( diff --git a/crates/store/re_chunk/tests/range.rs b/crates/store/re_chunk/tests/range.rs index c94c7a0fc92e..996a27d473c2 100644 --- a/crates/store/re_chunk/tests/range.rs +++ b/crates/store/re_chunk/tests/range.rs @@ -1,4 +1,4 @@ -use arrow2::datatypes::DataType as ArrowDatatype; +use arrow2::datatypes::DataType as Arrow2Datatype; use nohash_hasher::IntMap; use re_chunk::{Chunk, ComponentName, RangeQuery, RowId, TimePoint, Timeline}; @@ -12,7 +12,7 @@ use re_types_core::{Component as _, Loggable as _}; const ENTITY_PATH: &str = "my/entity"; -fn datatypes() -> IntMap { +fn datatypes() -> IntMap { [ (MyPoint::name(), MyPoint::arrow2_datatype()), (MyColor::name(), MyColor::arrow2_datatype()), diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index fc63f2d5f82d..07aae3bb3963 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -4,7 +4,7 @@ use std::collections::{BTreeMap, BTreeSet}; use arrow2::{ array::ListArray as ArrowListArray, - datatypes::{DataType as ArrowDatatype, Field as ArrowField}, + datatypes::{DataType as Arrow2Datatype, Field as Arrow2Field}, }; use re_chunk::TimelineName; @@ -40,7 +40,7 @@ impl ColumnDescriptor { } #[inline] - pub fn datatype(&self) -> ArrowDatatype { + pub fn datatype(&self) -> Arrow2Datatype { match self { Self::Time(descr) => descr.datatype.clone(), Self::Component(descr) => descr.returned_datatype(), @@ -48,7 +48,7 @@ impl ColumnDescriptor { } #[inline] - pub fn to_arrow_field(&self) -> ArrowField { + pub fn to_arrow_field(&self) -> Arrow2Field { match self { Self::Time(descr) => descr.to_arrow_field(), Self::Component(descr) => descr.to_arrow_field(), @@ -79,7 +79,7 @@ pub struct TimeColumnDescriptor { pub timeline: Timeline, /// The Arrow datatype of the column. - pub datatype: ArrowDatatype, + pub datatype: Arrow2Datatype, } impl PartialOrd for TimeColumnDescriptor { @@ -103,9 +103,9 @@ impl Ord for TimeColumnDescriptor { impl TimeColumnDescriptor { #[inline] // Time column must be nullable since static data doesn't have a time. - pub fn to_arrow_field(&self) -> ArrowField { + pub fn to_arrow_field(&self) -> Arrow2Field { let Self { timeline, datatype } = self; - ArrowField::new( + Arrow2Field::new( timeline.name().to_string(), datatype.clone(), true, /* nullable */ @@ -149,7 +149,7 @@ pub struct ComponentColumnDescriptor { /// This is the log-time datatype corresponding to how this data is encoded /// in a chunk. Currently this will always be an [`ArrowListArray`], but as /// we introduce mono-type optimization, this might be a native type instead. - pub store_datatype: ArrowDatatype, + pub store_datatype: Arrow2Datatype, /// Whether this column represents static data. pub is_static: bool, @@ -289,13 +289,13 @@ impl ComponentColumnDescriptor { } #[inline] - pub fn returned_datatype(&self) -> ArrowDatatype { + pub fn returned_datatype(&self) -> Arrow2Datatype { self.store_datatype.clone() } #[inline] - pub fn to_arrow_field(&self) -> ArrowField { - ArrowField::new( + pub fn to_arrow_field(&self) -> Arrow2Field { + Arrow2Field::new( format!( "{}:{}", self.entity_path, @@ -752,7 +752,7 @@ impl ChunkStore { let datatype = self .lookup_datatype(&component_name) .cloned() - .unwrap_or_else(|| ArrowDatatype::Null); + .unwrap_or_else(|| Arrow2Datatype::Null); ComponentColumnDescriptor { entity_path: selector.entity_path.clone(), diff --git a/crates/store/re_chunk_store/src/store.rs b/crates/store/re_chunk_store/src/store.rs index 10d0b47c55f4..20a34fbce2b8 100644 --- a/crates/store/re_chunk_store/src/store.rs +++ b/crates/store/re_chunk_store/src/store.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::sync::atomic::AtomicU64; use std::sync::Arc; -use arrow2::datatypes::DataType as ArrowDataType; +use arrow2::datatypes::DataType as Arrow2DataType; use nohash_hasher::IntMap; use re_chunk::{Chunk, ChunkId, RowId, TransportChunk}; @@ -407,7 +407,7 @@ pub struct ChunkStore { // // TODO(cmc): this would become fairly problematic in a world where each chunk can use a // different datatype for a given component. - pub(crate) type_registry: IntMap, + pub(crate) type_registry: IntMap, pub(crate) per_column_metadata: BTreeMap>, @@ -633,9 +633,9 @@ impl ChunkStore { self.chunks_per_chunk_id.len() } - /// Lookup the _latest_ arrow [`ArrowDataType`] used by a specific [`re_types_core::Component`]. + /// Lookup the _latest_ arrow [`Arrow2DataType`] used by a specific [`re_types_core::Component`]. #[inline] - pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<&ArrowDataType> { + pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<&Arrow2DataType> { self.type_registry.get(component_name) } diff --git a/crates/store/re_chunk_store/src/writes.rs b/crates/store/re_chunk_store/src/writes.rs index 2f168e09545e..1d64945306d9 100644 --- a/crates/store/re_chunk_store/src/writes.rs +++ b/crates/store/re_chunk_store/src/writes.rs @@ -1,7 +1,7 @@ use std::{collections::BTreeSet, sync::Arc}; use ahash::HashMap; -use arrow2::array::{Array as _, ListArray as ArrowListArray}; +use arrow2::array::{Array as _, ListArray as Arrow2ListArray}; use itertools::Itertools as _; use re_chunk::{Chunk, EntityPath, RowId}; @@ -339,7 +339,7 @@ impl ChunkStore { for (&component_name, list_array) in chunk.components() { self.type_registry.insert( component_name, - ArrowListArray::::get_child_type(list_array.data_type()).clone(), + Arrow2ListArray::::get_child_type(list_array.data_type()).clone(), ); let column_metadata_state = self diff --git a/crates/store/re_chunk_store/tests/gc.rs b/crates/store/re_chunk_store/tests/gc.rs index 9e1bd7d0463f..cc3e0601b170 100644 --- a/crates/store/re_chunk_store/tests/gc.rs +++ b/crates/store/re_chunk_store/tests/gc.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow2::array::Array as ArrowArray; +use arrow2::array::Array as Arrow2Array; use rand::Rng as _; use re_chunk::{Chunk, ChunkId, ComponentName, LatestAtQuery, RowId, TimeInt, TimePoint}; @@ -23,7 +23,7 @@ fn query_latest_array( entity_path: &EntityPath, component_name: ComponentName, query: &LatestAtQuery, -) -> Option<(TimeInt, RowId, Box)> { +) -> Option<(TimeInt, RowId, Box)> { re_tracing::profile_function!(); let ((data_time, row_id), unit) = store diff --git a/crates/store/re_chunk_store/tests/reads.rs b/crates/store/re_chunk_store/tests/reads.rs index e94eb426183e..d8c6370bfc2e 100644 --- a/crates/store/re_chunk_store/tests/reads.rs +++ b/crates/store/re_chunk_store/tests/reads.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow2::array::Array as ArrowArray; +use arrow2::array::Array as Arrow2Array; use itertools::Itertools; use re_chunk::{Chunk, ChunkId, RowId, TimePoint}; @@ -23,7 +23,7 @@ fn query_latest_array( entity_path: &EntityPath, component_name: ComponentName, query: &LatestAtQuery, -) -> Option<(TimeInt, RowId, Box)> { +) -> Option<(TimeInt, RowId, Box)> { re_tracing::profile_function!(); let ((data_time, row_id), unit) = store diff --git a/crates/store/re_data_loader/src/loader_archetype.rs b/crates/store/re_data_loader/src/loader_archetype.rs index 041541823a08..bbfc57f1cc47 100644 --- a/crates/store/re_data_loader/src/loader_archetype.rs +++ b/crates/store/re_data_loader/src/loader_archetype.rs @@ -5,7 +5,7 @@ use re_types::components::VideoTimestamp; use re_types::Archetype; use re_types::{components::MediaType, ComponentBatch}; -use arrow2::array::PrimitiveArray as ArrowPrimitiveArray; +use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; use arrow2::Either; use crate::{DataLoader, DataLoaderError, LoadedData}; @@ -193,7 +193,7 @@ fn load_video( Ok(frame_timestamps_ns) => { // Time column. let is_sorted = Some(true); - let time_column_times = ArrowPrimitiveArray::from_slice(&frame_timestamps_ns); + let time_column_times = Arrow2PrimitiveArray::from_slice(&frame_timestamps_ns); let time_column = re_chunk::TimeColumn::new(is_sorted, video_timeline, time_column_times); diff --git a/crates/store/re_dataframe/src/lib.rs b/crates/store/re_dataframe/src/lib.rs index dcf92a27e4d4..40702bc590ac 100644 --- a/crates/store/re_dataframe/src/lib.rs +++ b/crates/store/re_dataframe/src/lib.rs @@ -7,7 +7,7 @@ pub use self::engine::{QueryEngine, RecordBatch}; pub use self::query::QueryHandle; #[doc(no_inline)] -pub use self::external::arrow2::chunk::Chunk as ArrowChunk; +pub use self::external::arrow2::chunk::Chunk as Arrow2Chunk; #[doc(no_inline)] pub use self::external::re_chunk::{util::concatenate_record_batches, TransportChunk}; #[doc(no_inline)] diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index ae0d24f26dbd..0312ff3029d5 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -8,11 +8,11 @@ use std::{ use arrow2::{ array::{ - Array as ArrowArray, BooleanArray as ArrowBooleanArray, - PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, + PrimitiveArray as Arrow2PrimitiveArray, }, - chunk::Chunk as ArrowChunk, - datatypes::Schema as ArrowSchema, + chunk::Chunk as Arrow2Chunk, + datatypes::Schema as Arrow2Schema, Either, }; use itertools::Itertools; @@ -104,7 +104,7 @@ struct QueryHandleState { /// The Arrow schema that corresponds to the `selected_contents`. /// /// All returned rows will have this schema. - arrow_schema: ArrowSchema, + arrow_schema: Arrow2Schema, /// All the [`Chunk`]s included in the view contents. /// @@ -185,7 +185,7 @@ impl QueryHandle { // 3. Compute the Arrow schema of the selected components. // // Every result returned using this `QueryHandle` will match this schema exactly. - let arrow_schema = ArrowSchema { + let arrow_schema = Arrow2Schema { fields: selected_contents .iter() .map(|(_, descr)| descr.to_arrow_field()) @@ -518,9 +518,9 @@ impl QueryHandle { let values = list_array .values() .as_any() - .downcast_ref::()?; + .downcast_ref::()?; - let indices = ArrowPrimitiveArray::from_vec( + let indices = Arrow2PrimitiveArray::from_vec( values .iter() .enumerate() @@ -668,7 +668,7 @@ impl QueryHandle { /// /// Columns that do not yield any data will still be present in the results, filled with null values. #[inline] - pub fn schema(&self) -> &ArrowSchema { + pub fn schema(&self) -> &Arrow2Schema { &self.init().arrow_schema } @@ -791,7 +791,7 @@ impl QueryHandle { /// } /// ``` #[inline] - pub fn next_row(&self) -> Option>> { + pub fn next_row(&self) -> Option>> { self.engine .with(|store, cache| self._next_row(store, cache)) } @@ -814,7 +814,7 @@ impl QueryHandle { #[cfg(not(target_arch = "wasm32"))] pub fn next_row_async( &self, - ) -> impl std::future::Future>>> + ) -> impl std::future::Future>>> where E: 'static + Send + Clone, { @@ -853,7 +853,7 @@ impl QueryHandle { &self, store: &ChunkStore, cache: &QueryCache, - ) -> Option>> { + ) -> Option>> { re_tracing::profile_function!(); /// Temporary state used to resolve the streaming join for the current iteration. @@ -1137,7 +1137,7 @@ impl QueryHandle { state.filtered_index, ( *cur_index_value, - ArrowPrimitiveArray::::from_vec(vec![cur_index_value.as_i64()]) + Arrow2PrimitiveArray::::from_vec(vec![cur_index_value.as_i64()]) .to(state.filtered_index.datatype()) .to_boxed(), ), @@ -1227,7 +1227,7 @@ impl QueryHandle { pub fn next_row_batch(&self) -> Option { Some(RecordBatch { schema: self.schema().clone(), - data: ArrowChunk::new(self.next_row()?), + data: Arrow2Chunk::new(self.next_row()?), }) } @@ -1245,7 +1245,7 @@ impl QueryHandle { Some(RecordBatch { schema, - data: ArrowChunk::new(row), + data: Arrow2Chunk::new(row), }) } } @@ -1253,13 +1253,13 @@ impl QueryHandle { impl QueryHandle { /// Returns an iterator backed by [`Self::next_row`]. #[allow(clippy::should_implement_trait)] // we need an anonymous closure, this won't work - pub fn iter(&self) -> impl Iterator>> + '_ { + pub fn iter(&self) -> impl Iterator>> + '_ { std::iter::from_fn(move || self.next_row()) } /// Returns an iterator backed by [`Self::next_row`]. #[allow(clippy::should_implement_trait)] // we need an anonymous closure, this won't work - pub fn into_iter(self) -> impl Iterator>> { + pub fn into_iter(self) -> impl Iterator>> { std::iter::from_fn(move || self.next_row()) } diff --git a/crates/store/re_log_types/src/arrow_msg.rs b/crates/store/re_log_types/src/arrow_msg.rs index 09fdc9eeac7b..d680ec5aa2b0 100644 --- a/crates/store/re_log_types/src/arrow_msg.rs +++ b/crates/store/re_log_types/src/arrow_msg.rs @@ -1,17 +1,17 @@ //! [`ArrowMsg`] is the [`crate::LogMsg`] sub-type containing an Arrow payload. //! //! We have custom implementations of [`serde::Serialize`] and [`serde::Deserialize`] that wraps -//! the inner Arrow serialization of [`ArrowSchema`] and [`ArrowChunk`]. +//! the inner Arrow serialization of [`Arrow2Schema`] and [`Arrow2Chunk`]. use std::sync::Arc; use crate::TimePoint; use arrow2::{ - array::Array as ArrowArray, chunk::Chunk as ArrowChunk, datatypes::Schema as ArrowSchema, + array::Array as Arrow2Array, chunk::Chunk as Arrow2Chunk, datatypes::Schema as Arrow2Schema, }; /// An arbitrary callback to be run when an [`ArrowMsg`], and more specifically the -/// [`ArrowChunk`] within it, goes out of scope. +/// [`Arrow2Chunk`] within it, goes out of scope. /// /// If the [`ArrowMsg`] has been cloned in a bunch of places, the callback will run for each and /// every instance. @@ -20,10 +20,10 @@ use arrow2::{ // TODO(#6412): probably don't need this anymore. #[allow(clippy::type_complexity)] #[derive(Clone)] -pub struct ArrowChunkReleaseCallback(Arc>) + Send + Sync>); +pub struct ArrowChunkReleaseCallback(Arc>) + Send + Sync>); impl std::ops::Deref for ArrowChunkReleaseCallback { - type Target = dyn Fn(ArrowChunk>) + Send + Sync; + type Target = dyn Fn(Arrow2Chunk>) + Send + Sync; #[inline] fn deref(&self) -> &Self::Target { @@ -33,7 +33,7 @@ impl std::ops::Deref for ArrowChunkReleaseCallback { impl From for ArrowChunkReleaseCallback where - F: Fn(ArrowChunk>) + Send + Sync + 'static, + F: Fn(Arrow2Chunk>) + Send + Sync + 'static, { #[inline] fn from(f: F) -> Self { @@ -80,10 +80,10 @@ pub struct ArrowMsg { pub timepoint_max: TimePoint, /// Schema for all control & data columns. - pub schema: ArrowSchema, + pub schema: Arrow2Schema, /// Data for all control & data columns. - pub chunk: ArrowChunk>, + pub chunk: Arrow2Chunk>, // pub on_release: Option>, pub on_release: Option, @@ -183,7 +183,7 @@ impl<'de> serde::Deserialize<'de> for ArrowMsg { .map_err(|err| serde::de::Error::custom(format!("Arrow error: {err}")))?; if chunks.is_empty() { - return Err(serde::de::Error::custom("No ArrowChunk found in stream")); + return Err(serde::de::Error::custom("No Arrow2Chunk found in stream")); } if chunks.len() > 1 { return Err(serde::de::Error::custom(format!( diff --git a/crates/store/re_protos/src/codec.rs b/crates/store/re_protos/src/codec.rs index 9b7317d7e92d..375722bfac8d 100644 --- a/crates/store/re_protos/src/codec.rs +++ b/crates/store/re_protos/src/codec.rs @@ -1,7 +1,7 @@ -use arrow2::array::Array as ArrowArray; -use arrow2::chunk::Chunk as ArrowChunk; -use arrow2::datatypes::Schema as ArrowSchema; -use arrow2::error::Error as ArrowError; +use arrow2::array::Array as Arrow2Array; +use arrow2::chunk::Chunk as Arrow2Chunk; +use arrow2::datatypes::Schema as Arrow2Schema; +use arrow2::error::Error as Arrow2Error; use arrow2::io::ipc::{read, write}; use re_dataframe::TransportChunk; @@ -10,7 +10,7 @@ use crate::v0::{EncoderVersion, RecordingMetadata}; #[derive(Debug, thiserror::Error)] pub enum CodecError { #[error("Arrow serialization error: {0}")] - ArrowSerialization(ArrowError), + ArrowSerialization(Arrow2Error), #[error("Failed to decode message header {0}")] HeaderDecoding(std::io::Error), @@ -191,11 +191,11 @@ impl RecordingMetadata { .position(|field| field.name == "id") .ok_or_else(|| CodecError::InvalidArgument("missing id field in schema".to_owned()))?; - use arrow2::array::Utf8Array as ArrowUtf8Array; + use arrow2::array::Utf8Array as Arrow2Utf8Array; let id = metadata.data.columns()[id_pos] .as_any() - .downcast_ref::>() + .downcast_ref::>() .ok_or_else(|| { CodecError::InvalidArgument(format!( "Unexpected type for id with position {id_pos} in schema: {:?}", @@ -215,8 +215,8 @@ impl RecordingMetadata { /// using Arrow IPC format. fn write_arrow_to_bytes( writer: &mut W, - schema: &ArrowSchema, - data: &ArrowChunk>, + schema: &Arrow2Schema, + data: &Arrow2Chunk>, ) -> Result<(), CodecError> { let options = write::WriteOptions { compression: None }; let mut sw = write::StreamWriter::new(writer, options); @@ -234,7 +234,7 @@ fn write_arrow_to_bytes( /// using Arrow IPC format. fn read_arrow_from_bytes( reader: &mut R, -) -> Result<(ArrowSchema, ArrowChunk>), CodecError> { +) -> Result<(Arrow2Schema, Arrow2Chunk>), CodecError> { let metadata = read::read_stream_metadata(reader).map_err(CodecError::ArrowSerialization)?; let mut stream = read::StreamReader::new(reader, metadata, None); @@ -253,12 +253,11 @@ fn read_arrow_from_bytes( #[cfg(test)] mod tests { - - use arrow2::array::Utf8Array as ArrowUtf8Array; - use arrow2::chunk::Chunk as ArrowChunk; + use arrow2::array::Utf8Array as Arrow2Utf8Array; + use arrow2::chunk::Chunk as Arrow2Chunk; use arrow2::{ - array::Int32Array as ArrowInt32Array, datatypes::Field as ArrowField, - datatypes::Schema as ArrowSchema, + array::Int32Array as Arrow2Int32Array, datatypes::Field as Arrow2Field, + datatypes::Schema as Arrow2Schema, }; use re_dataframe::external::re_chunk::{Chunk, RowId}; use re_dataframe::TransportChunk; @@ -360,14 +359,14 @@ mod tests { #[test] fn test_recording_metadata_serialization() { - let expected_schema = ArrowSchema::from(vec![ - ArrowField::new("id", arrow2::datatypes::DataType::Utf8, false), - ArrowField::new("my_int", arrow2::datatypes::DataType::Int32, false), + let expected_schema = Arrow2Schema::from(vec![ + Arrow2Field::new("id", arrow2::datatypes::DataType::Utf8, false), + Arrow2Field::new("my_int", arrow2::datatypes::DataType::Int32, false), ]); - let id = ArrowUtf8Array::::from_slice(["some_id"]); - let my_ints = ArrowInt32Array::from_slice([42]); - let expected_chunk = ArrowChunk::new(vec![Box::new(id) as _, Box::new(my_ints) as _]); + let id = Arrow2Utf8Array::::from_slice(["some_id"]); + let my_ints = Arrow2Int32Array::from_slice([42]); + let expected_chunk = Arrow2Chunk::new(vec![Box::new(id) as _, Box::new(my_ints) as _]); let metadata_tc = TransportChunk { schema: expected_schema.clone(), data: expected_chunk.clone(), @@ -387,15 +386,15 @@ mod tests { #[test] fn test_recording_metadata_fails_with_non_unit_batch() { - let expected_schema = ArrowSchema::from(vec![ArrowField::new( + let expected_schema = Arrow2Schema::from(vec![Arrow2Field::new( "my_int", arrow2::datatypes::DataType::Int32, false, )]); // more than 1 row in the batch - let my_ints = ArrowInt32Array::from_slice([41, 42]); + let my_ints = Arrow2Int32Array::from_slice([41, 42]); - let expected_chunk = ArrowChunk::new(vec![Box::new(my_ints) as _]); + let expected_chunk = Arrow2Chunk::new(vec![Box::new(my_ints) as _]); let metadata_tc = TransportChunk { schema: expected_schema.clone(), data: expected_chunk, diff --git a/crates/store/re_query/examples/latest_at.rs b/crates/store/re_query/examples/latest_at.rs index e096db0fb8af..362e9481ce7c 100644 --- a/crates/store/re_query/examples/latest_at.rs +++ b/crates/store/re_query/examples/latest_at.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Context; -use arrow2::array::PrimitiveArray as ArrowPrimitiveArray; +use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; use itertools::Itertools; use re_chunk::{Chunk, RowId}; @@ -78,7 +78,7 @@ fn main() -> anyhow::Result<()> { .context("invalid")?; let colors = colors .as_any() - .downcast_ref::>() + .downcast_ref::>() .context("invalid")?; let colors = colors .values() diff --git a/crates/store/re_query/src/latest_at.rs b/crates/store/re_query/src/latest_at.rs index 90c65dce7031..8a9c5966022d 100644 --- a/crates/store/re_query/src/latest_at.rs +++ b/crates/store/re_query/src/latest_at.rs @@ -3,7 +3,7 @@ use std::{ sync::Arc, }; -use arrow2::array::Array as ArrowArray; +use arrow2::array::Array as Arrow2Array; use nohash_hasher::IntMap; use parking_lot::RwLock; @@ -308,7 +308,7 @@ impl LatestAtResults { pub fn component_batch_raw( &self, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { self.components .get(component_name) .and_then(|unit| unit.component_batch_raw(component_name)) @@ -354,7 +354,7 @@ impl LatestAtResults { log_level: re_log::Level, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option> { self.components.get(component_name).and_then(|unit| { self.ok_or_log_err( log_level, @@ -372,7 +372,7 @@ impl LatestAtResults { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option> { self.component_instance_raw_with_log_level( re_log::Level::Error, component_name, @@ -386,7 +386,7 @@ impl LatestAtResults { &self, component_name: &ComponentName, instance_index: usize, - ) -> Option> { + ) -> Option> { self.components.get(component_name).and_then(|unit| { unit.component_instance_raw(component_name, instance_index)? .ok() @@ -440,7 +440,7 @@ impl LatestAtResults { &self, log_level: re_log::Level, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { self.components.get(component_name).and_then(|unit| { self.ok_or_log_err( log_level, @@ -457,7 +457,7 @@ impl LatestAtResults { pub fn component_mono_raw( &self, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { self.component_mono_raw_with_log_level(re_log::Level::Error, component_name) } @@ -468,7 +468,7 @@ impl LatestAtResults { pub fn component_mono_raw_quiet( &self, component_name: &ComponentName, - ) -> Option> { + ) -> Option> { self.components .get(component_name) .and_then(|unit| unit.component_mono_raw(component_name)?.ok()) diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index ecc433d31a6c..5be97ce1e10e 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -42,7 +42,9 @@ re_tuid.workspace = true # External anyhow.workspace = true +arrow.workspace = true arrow2 = { workspace = true, features = [ + "arrow", "io_ipc", "io_print", "compute_concatenate", diff --git a/crates/store/re_types_core/src/arrow_buffer.rs b/crates/store/re_types_core/src/arrow_buffer.rs index b35aaad9afde..f386770efa15 100644 --- a/crates/store/re_types_core/src/arrow_buffer.rs +++ b/crates/store/re_types_core/src/arrow_buffer.rs @@ -1,15 +1,13 @@ -use arrow2::buffer::Buffer; - -/// Convenience-wrapper around an arrow [`Buffer`] that is known to contain a +/// Convenience-wrapper around an [`arrow2::buffer::Buffer`] that is known to contain a /// a primitive type. /// -/// The arrow2 [`Buffer`] object is internally reference-counted and can be +/// The [`ArrowBuffer`] object is internally reference-counted and can be /// easily converted back to a `&[T]` referencing the underlying storage. /// This avoids some of the lifetime complexities that would otherwise /// arise from returning a `&[T]` directly, but is significantly more /// performant than doing the full allocation necessary to return a `Vec`. #[derive(Clone, Debug, Default, PartialEq)] -pub struct ArrowBuffer(pub Buffer); +pub struct ArrowBuffer(arrow2::buffer::Buffer); impl crate::SizeBytes for ArrowBuffer { #[inline] @@ -50,12 +48,7 @@ impl ArrowBuffer { self.0.as_slice() } - #[inline] - pub fn into_inner(self) -> Buffer { - self.0 - } - - /// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`. + /// Returns a new [`ArrowBuffer`] that is a slice of this buffer starting at `offset`. /// /// Doing so allows the same memory region to be shared between buffers. /// @@ -100,9 +93,18 @@ impl ArrowBuffer { } } -impl From> for ArrowBuffer { +impl + From> for ArrowBuffer +{ + #[inline] + fn from(value: arrow::buffer::ScalarBuffer) -> Self { + Self(value.into_inner().into()) + } +} + +impl From> for ArrowBuffer { #[inline] - fn from(value: Buffer) -> Self { + fn from(value: arrow2::buffer::Buffer) -> Self { Self(value) } } @@ -123,7 +125,7 @@ impl From<&[T]> for ArrowBuffer { impl FromIterator for ArrowBuffer { fn from_iter>(iter: I) -> Self { - Self(Buffer::from_iter(iter)) + Self(arrow2::buffer::Buffer::from_iter(iter)) } } diff --git a/crates/store/re_types_core/src/loggable.rs b/crates/store/re_types_core/src/loggable.rs index 0c42840936d5..e331da70b718 100644 --- a/crates/store/re_types_core/src/loggable.rs +++ b/crates/store/re_types_core/src/loggable.rs @@ -16,6 +16,11 @@ use crate::{Archetype, ComponentBatch, LoggableBatch}; /// Implementing the [`Loggable`] trait automatically derives the [`LoggableBatch`] implementation, /// which makes it possible to work with lists' worth of data in a generic fashion. pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { + /// The underlying [`arrow::datatypes::DataType`], excluding datatype extensions. + fn arrow_datatype() -> arrow::datatypes::DataType { + Self::arrow2_datatype().into() + } + /// The underlying [`arrow2::datatypes::DataType`], excluding datatype extensions. fn arrow2_datatype() -> arrow2::datatypes::DataType; @@ -25,9 +30,24 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { /// When using Rerun's builtin components & datatypes, this can only fail if the data /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, /// 2^63 for large arrays). + fn to_arrow_opt<'a>( + data: impl IntoIterator>>>, + ) -> SerializationResult + where + Self: 'a, + { + Self::to_arrow2_opt(data).map(|array| arrow2::array::to_data(array.as_ref())) + } + + /// Given an iterator of options of owned or reference values to the current + /// [`Loggable`], serializes them into an Arrow2 array. + /// + /// When using Rerun's builtin components & datatypes, this can only fail if the data + /// exceeds the maximum number of entries in an Arrow2 array (2^31 for standard arrays, + /// 2^63 for large arrays). fn to_arrow2_opt<'a>( data: impl IntoIterator>>>, - ) -> SerializationResult> + ) -> SerializationResult> where Self: 'a; @@ -40,9 +60,26 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, /// 2^63 for large arrays). #[inline] + fn to_arrow<'a>( + data: impl IntoIterator>>, + ) -> SerializationResult + where + Self: 'a, + { + re_tracing::profile_function!(); + Self::to_arrow_opt(data.into_iter().map(Some)) + } + + /// Given an iterator of owned or reference values to the current [`Loggable`], serializes + /// them into an Arrow2 array. + /// + /// When using Rerun's builtin components & datatypes, this can only fail if the data + /// exceeds the maximum number of entries in an Arrow2 array (2^31 for standard arrays, + /// 2^63 for large arrays). + #[inline] fn to_arrow2<'a>( data: impl IntoIterator>>, - ) -> SerializationResult> + ) -> SerializationResult> where Self: 'a, { @@ -54,7 +91,21 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { /// Given an Arrow array, deserializes it into a collection of [`Loggable`]s. #[inline] - fn from_arrow2(data: &dyn ::arrow2::array::Array) -> DeserializationResult> { + fn from_arrow(data: &arrow::array::ArrayData) -> DeserializationResult> { + re_tracing::profile_function!(); + Self::from_arrow_opt(data)? + .into_iter() + .map(|opt| { + opt.ok_or_else(|| crate::DeserializationError::MissingData { + backtrace: _Backtrace::new_unresolved(), + }) + }) + .collect::>>() + } + + /// Given an Arrow2 array, deserializes it into a collection of [`Loggable`]s. + #[inline] + fn from_arrow2(data: &dyn arrow2::array::Array) -> DeserializationResult> { re_tracing::profile_function!(); Self::from_arrow2_opt(data)? .into_iter() @@ -67,8 +118,13 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { } /// Given an Arrow array, deserializes it into a collection of optional [`Loggable`]s. + fn from_arrow_opt(data: &arrow::array::ArrayData) -> DeserializationResult>> { + Self::from_arrow2_opt(arrow2::array::from_data(data).as_ref()) + } + + /// Given an Arrow2 array, deserializes it into a collection of optional [`Loggable`]s. fn from_arrow2_opt( - data: &dyn ::arrow2::array::Array, + data: &dyn arrow2::array::Array, ) -> DeserializationResult>> { _ = data; // NOTE: do this here to avoid breaking users' autocomplete snippets Err(crate::DeserializationError::NotImplemented { diff --git a/crates/store/re_types_core/src/loggable_batch.rs b/crates/store/re_types_core/src/loggable_batch.rs index 864ab4f2c06b..43a0c24ea395 100644 --- a/crates/store/re_types_core/src/loggable_batch.rs +++ b/crates/store/re_types_core/src/loggable_batch.rs @@ -1,6 +1,6 @@ use crate::{Component, ComponentName, Loggable, SerializationResult}; -use arrow2::array::ListArray as ArrowListArray; +use arrow2::array::ListArray as Arrow2ListArray; #[allow(unused_imports)] // used in docstrings use crate::Archetype; @@ -31,12 +31,12 @@ pub trait ComponentBatch: LoggableBatch { fn name(&self) -> ComponentName; /// Serializes the batch into an Arrow list array with a single component per list. - fn to_arrow_list_array(&self) -> SerializationResult> { + fn to_arrow_list_array(&self) -> SerializationResult> { let array = self.to_arrow2()?; let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::repeat(1).take(array.len()))?; - let data_type = ArrowListArray::::default_datatype(array.data_type().clone()); - ArrowListArray::::try_new(data_type, offsets.into(), array.to_boxed(), None) + let data_type = Arrow2ListArray::::default_datatype(array.data_type().clone()); + Arrow2ListArray::::try_new(data_type, offsets.into(), array.to_boxed(), None) .map_err(|err| err.into()) } } diff --git a/crates/top/re_sdk/src/recording_stream.rs b/crates/top/re_sdk/src/recording_stream.rs index 131a2dca6941..7ada4aec7bf7 100644 --- a/crates/top/re_sdk/src/recording_stream.rs +++ b/crates/top/re_sdk/src/recording_stream.rs @@ -9,7 +9,7 @@ use crossbeam::channel::{Receiver, Sender}; use itertools::Either; use parking_lot::Mutex; -use arrow2::array::{ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray}; +use arrow2::array::{ListArray as ArrowListArray, PrimitiveArray as Arrow2PrimitiveArray}; use re_chunk::{Chunk, ChunkBatcher, ChunkBatcherConfig, ChunkBatcherError, PendingRow, RowId}; use re_chunk::{ChunkError, ChunkId, ComponentName, TimeColumn}; @@ -1541,7 +1541,7 @@ impl RecordingStream { let time_timeline = Timeline::log_time(); let time = TimeInt::new_temporal(Time::now().nanos_since_epoch()); - let repeated_time = ArrowPrimitiveArray::::from_values( + let repeated_time = Arrow2PrimitiveArray::::from_values( std::iter::repeat(time.as_i64()).take(chunk.num_rows()), ) .to(time_timeline.datatype()); @@ -1565,7 +1565,7 @@ impl RecordingStream { .tick .fetch_add(1, std::sync::atomic::Ordering::Relaxed); - let repeated_tick = ArrowPrimitiveArray::::from_values( + let repeated_tick = Arrow2PrimitiveArray::::from_values( std::iter::repeat(tick).take(chunk.num_rows()), ) .to(tick_timeline.datatype()); diff --git a/crates/top/rerun/src/commands/rrd/filter.rs b/crates/top/rerun/src/commands/rrd/filter.rs index fdc2086729f1..53efe7cf9d4e 100644 --- a/crates/top/rerun/src/commands/rrd/filter.rs +++ b/crates/top/rerun/src/commands/rrd/filter.rs @@ -186,7 +186,7 @@ impl FilterCommand { // --- use re_sdk::{ - external::arrow2::{datatypes::Field as ArrowField, datatypes::Schema as ArrowSchema}, + external::arrow2::{datatypes::Field as ArrowField, datatypes::Schema as Arrow2Schema}, EntityPath, }; @@ -204,7 +204,7 @@ fn should_keep_timeline(dropped_timelines: &HashSet<&String>, field: &ArrowField fn should_keep_entity_path( dropped_entity_paths: &HashSet, - schema: &ArrowSchema, + schema: &Arrow2Schema, ) -> bool { let Some(entity_path) = schema .metadata diff --git a/crates/top/rerun_c/src/arrow_utils.rs b/crates/top/rerun_c/src/arrow_utils.rs index b3f0b15ca81f..cd33891e8aa6 100644 --- a/crates/top/rerun_c/src/arrow_utils.rs +++ b/crates/top/rerun_c/src/arrow_utils.rs @@ -12,7 +12,7 @@ pub unsafe fn arrow_array_from_c_ffi( array: &arrow2::ffi::ArrowArray, datatype: arrow2::datatypes::DataType, ) -> Result, CError> { - // Arrow2 implements drop for ArrowArray and ArrowSchema. + // Arrow2 implements drop for Arrow2Array and Arrow2Schema. // // Therefore, for things to work correctly we have to take ownership of the array! // All methods passing arrow arrays through our C interface are documented to take ownership of the component batch. diff --git a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs index d2a48ffa642e..5017d683b534 100644 --- a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs +++ b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use egui_extras::{Column, TableRow}; use itertools::Itertools; -use re_chunk_store::external::re_chunk::{ArrowArray, TransportChunk}; +use re_chunk_store::external::re_chunk::{Arrow2Array, TransportChunk}; use re_chunk_store::Chunk; use re_log_types::external::re_types_core::SizeBytes; use re_log_types::{TimeZone, Timeline}; diff --git a/crates/viewer/re_selection_panel/src/visualizer_ui.rs b/crates/viewer/re_selection_panel/src/visualizer_ui.rs index 858551140af8..c6e4711f8927 100644 --- a/crates/viewer/re_selection_panel/src/visualizer_ui.rs +++ b/crates/viewer/re_selection_panel/src/visualizer_ui.rs @@ -160,7 +160,7 @@ fn visualizer_components( fn non_empty_component_batch_raw( unit: Option<&UnitChunkShared>, component_name: &ComponentName, - ) -> Option<(Option, Box)> { + ) -> Option<(Option, Box)> { let unit = unit?; let batch = unit.component_batch_raw(component_name)?; if batch.is_empty() { diff --git a/crates/viewer/re_space_view/src/query.rs b/crates/viewer/re_space_view/src/query.rs index 5cd32417210b..4c41a2274d89 100644 --- a/crates/viewer/re_space_view/src/query.rs +++ b/crates/viewer/re_space_view/src/query.rs @@ -4,7 +4,7 @@ use crate::{ results_ext::{HybridLatestAtResults, HybridRangeResults}, HybridResults, }; -use re_chunk_store::{external::re_chunk::ArrowArray, LatestAtQuery, RangeQuery, RowId}; +use re_chunk_store::{external::re_chunk::Arrow2Array, LatestAtQuery, RangeQuery, RowId}; use re_log_types::{TimeInt, Timeline}; use re_query::LatestAtResults; use re_types_core::{Archetype, ComponentName}; @@ -245,7 +245,7 @@ pub trait DataResultQuery { query_ctx: &'a QueryContext<'a>, visualizer_collection: &'a re_viewer_context::VisualizerCollection, component: re_types_core::ComponentName, - ) -> Box; + ) -> Box; } impl DataResultQuery for DataResult { @@ -285,7 +285,7 @@ impl DataResultQuery for DataResult { query_ctx: &'a QueryContext<'a>, visualizer_collection: &'a re_viewer_context::VisualizerCollection, component: re_types_core::ComponentName, - ) -> Box { + ) -> Box { // TODO(jleibs): This should be cached somewhere for vis in &self.visualizers { let Ok(vis) = visualizer_collection.get_by_identifier(*vis) else { diff --git a/crates/viewer/re_space_view/src/results_ext.rs b/crates/viewer/re_space_view/src/results_ext.rs index c894e27c7fa8..8f0da5517cec 100644 --- a/crates/viewer/re_space_view/src/results_ext.rs +++ b/crates/viewer/re_space_view/src/results_ext.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use itertools::Itertools as _; use re_chunk_store::{Chunk, LatestAtQuery, RangeQuery, UnitChunkShared}; -use re_log_types::external::arrow2::array::Array as ArrowArray; +use re_log_types::external::arrow2::array::Array as Arrow2Array; use re_log_types::hash::Hash64; use re_query::{LatestAtResults, RangeResults}; use re_types_core::ComponentName; @@ -50,7 +50,7 @@ impl<'a> HybridLatestAtResults<'a> { .or_else(|| self.defaults.get(&component_name)) } - pub fn fallback_raw(&self, component_name: ComponentName) -> Box { + pub fn fallback_raw(&self, component_name: ComponentName) -> Box { let query_context = QueryContext { viewer_ctx: self.ctx.viewer_ctx, target_entity_path: &self.data_result.entity_path, diff --git a/crates/viewer/re_space_view/src/view_property_ui.rs b/crates/viewer/re_space_view/src/view_property_ui.rs index 1d976fda61d6..5136494bc590 100644 --- a/crates/viewer/re_space_view/src/view_property_ui.rs +++ b/crates/viewer/re_space_view/src/view_property_ui.rs @@ -1,4 +1,4 @@ -use re_chunk_store::{external::re_chunk::ArrowArray, RowId}; +use re_chunk_store::{external::re_chunk::Arrow2Array, RowId}; use re_types_core::{ reflection::{ArchetypeFieldReflection, ArchetypeReflection}, Archetype, ArchetypeName, ArchetypeReflectionMarker, ComponentName, @@ -120,7 +120,7 @@ fn view_property_component_ui( field: &ArchetypeFieldReflection, blueprint_path: &re_log_types::EntityPath, row_id: Option, - component_array: Option<&dyn ArrowArray>, + component_array: Option<&dyn Arrow2Array>, fallback_provider: &dyn ComponentFallbackProvider, ) { let singleline_list_item_content = singleline_list_item_content( @@ -179,7 +179,7 @@ fn menu_more( ui: &mut egui::Ui, blueprint_path: &re_log_types::EntityPath, component_name: ComponentName, - component_array: Option<&dyn ArrowArray>, + component_array: Option<&dyn Arrow2Array>, ) { let property_differs_from_default = component_array != ctx @@ -228,7 +228,7 @@ fn singleline_list_item_content<'a>( blueprint_path: &'a re_log_types::EntityPath, component_name: ComponentName, row_id: Option, - component_array: Option<&'a dyn ArrowArray>, + component_array: Option<&'a dyn Arrow2Array>, fallback_provider: &'a dyn ComponentFallbackProvider, ) -> list_item::PropertyContent<'a> { list_item::PropertyContent::new(display_name) diff --git a/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs b/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs index 64c2b3a3d570..c0d65cb8bb1c 100644 --- a/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs +++ b/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs @@ -5,7 +5,7 @@ use anyhow::Context; use egui::NumExt as _; use itertools::Itertools; -use re_chunk_store::external::re_chunk::ArrowArray; +use re_chunk_store::external::re_chunk::Arrow2Array; use re_chunk_store::{ColumnDescriptor, LatestAtQuery}; use re_dataframe::external::re_query::StorageEngineArcReadGuard; use re_dataframe::QueryHandle; @@ -142,7 +142,7 @@ struct RowsDisplayData { impl RowsDisplayData { fn try_new( row_indices: &Range, - row_data: Vec>>, + row_data: Vec>>, selected_columns: &[ColumnDescriptor], query_timeline: &Timeline, ) -> Result { diff --git a/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs b/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs index 36763ddc2968..e8217b0ca165 100644 --- a/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs +++ b/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs @@ -5,11 +5,11 @@ use thiserror::Error; use re_chunk_store::external::arrow2::{ array::{ - Array as ArrowArray, DictionaryArray as ArrowDictionaryArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, DictionaryArray as Arrow2DictionaryArray, + ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, }, datatypes::DataType, - datatypes::DataType as ArrowDataType, + datatypes::DataType as Arrow2DataType, }; use re_chunk_store::{ColumnDescriptor, ComponentColumnDescriptor, LatestAtQuery}; use re_log_types::{EntityPath, TimeInt, Timeline}; @@ -21,10 +21,10 @@ use re_viewer_context::{UiLayout, ViewerContext}; #[derive(Error, Debug)] pub(crate) enum DisplayRecordBatchError { #[error("Unexpected column data type for timeline '{0}': {1:?}")] - UnexpectedTimeColumnDataType(String, ArrowDataType), + UnexpectedTimeColumnDataType(String, Arrow2DataType), #[error("Unexpected column data type for component '{0}': {1:?}")] - UnexpectedComponentColumnDataType(String, ArrowDataType), + UnexpectedComponentColumnDataType(String, Arrow2DataType), } /// A single column of component data. @@ -33,10 +33,10 @@ pub(crate) enum DisplayRecordBatchError { #[derive(Debug)] pub(crate) enum ComponentData { Null, - ListArray(ArrowListArray), + ListArray(Arrow2ListArray), DictionaryArray { - dict: ArrowDictionaryArray, - values: ArrowListArray, + dict: Arrow2DictionaryArray, + values: Arrow2ListArray, }, } @@ -44,27 +44,27 @@ impl ComponentData { #[allow(clippy::borrowed_box)] // https://github.com/rust-lang/rust-clippy/issues/11940 fn try_new( descriptor: &ComponentColumnDescriptor, - column_data: &Box, + column_data: &Box, ) -> Result { match column_data.data_type() { DataType::Null => Ok(Self::Null), DataType::List(_) => Ok(Self::ListArray( column_data .as_any() - .downcast_ref::>() + .downcast_ref::>() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(), )), DataType::Dictionary(IntegerType::Int32, _, _) => { let dict = column_data .as_any() - .downcast_ref::>() + .downcast_ref::>() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(); let values = dict .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(); Ok(Self::DictionaryArray { dict, values }) @@ -167,7 +167,7 @@ impl ComponentData { pub(crate) enum DisplayColumn { Timeline { timeline: Timeline, - time_data: ArrowPrimitiveArray, + time_data: Arrow2PrimitiveArray, }, Component { entity_path: EntityPath, @@ -180,13 +180,13 @@ impl DisplayColumn { #[allow(clippy::borrowed_box)] // https://github.com/rust-lang/rust-clippy/issues/11940 fn try_new( column_descriptor: &ColumnDescriptor, - column_data: &Box, + column_data: &Box, ) -> Result { match column_descriptor { ColumnDescriptor::Time(desc) => { let time_data = column_data .as_any() - .downcast_ref::>() + .downcast_ref::>() .ok_or_else(|| { DisplayRecordBatchError::UnexpectedTimeColumnDataType( desc.timeline.name().as_str().to_owned(), @@ -305,7 +305,7 @@ impl DisplayRecordBatch { /// The columns in the record batch must match the selected columns. This is guaranteed by /// `re_datastore`. pub(crate) fn try_new( - row_data: &Vec>, + row_data: &Vec>, selected_columns: &[ColumnDescriptor], ) -> Result { let num_rows = row_data.first().map(|arr| arr.len()).unwrap_or(0); diff --git a/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs b/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs index bfe8ac821844..47af47be660d 100644 --- a/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs +++ b/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs @@ -5,7 +5,7 @@ use re_log_types::{EntityPath, TimeInt}; use re_space_view::range_with_blueprint_resolved_data; use re_types::archetypes; use re_types::components::{AggregationPolicy, ClearIsRecursive}; -use re_types::external::arrow2::datatypes::DataType as ArrowDatatype; +use re_types::external::arrow2::datatypes::DataType as Arrow2Datatype; use re_types::{ archetypes::SeriesLine, components::{Color, Name, Scalar, StrokeWidth}, @@ -240,7 +240,7 @@ impl SeriesLineSystem { chunk.iter_component_indices(&query.timeline(), &Scalar::name()) }) .map(|(data_time, _)| { - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); PlotPoint { time: data_time.as_i64(), @@ -254,7 +254,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill values"); - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); all_scalar_chunks .iter() .flat_map(|chunk| chunk.iter_primitive::(&Scalar::name())) @@ -278,7 +278,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill colors"); - debug_assert_eq!(Color::arrow2_datatype(), ArrowDatatype::UInt32); + debug_assert_eq!(Color::arrow2_datatype(), Arrow2Datatype::UInt32); fn map_raw_color(raw: &[u32]) -> Option { raw.first().map(|c| { @@ -331,7 +331,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill stroke widths"); - debug_assert_eq!(StrokeWidth::arrow2_datatype(), ArrowDatatype::Float32); + debug_assert_eq!(StrokeWidth::arrow2_datatype(), Arrow2Datatype::Float32); { let all_stroke_width_chunks = results.get_optional_chunks(&StrokeWidth::name()); diff --git a/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs b/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs index 960c7596e6f0..c0a9f2aedf78 100644 --- a/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs +++ b/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs @@ -4,7 +4,7 @@ use re_space_view::range_with_blueprint_resolved_data; use re_types::{ archetypes::{self, SeriesPoint}, components::{Color, MarkerShape, MarkerSize, Name, Scalar}, - external::arrow2::datatypes::DataType as ArrowDatatype, + external::arrow2::datatypes::DataType as Arrow2Datatype, Archetype as _, Component as _, Loggable as _, }; use re_viewer_context::{ @@ -254,7 +254,7 @@ impl SeriesPointSystem { chunk.iter_component_indices(&query.timeline(), &Scalar::name()) }) .map(|(data_time, _)| { - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); PlotPoint { time: data_time.as_i64(), @@ -268,7 +268,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill values"); - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); let mut i = 0; all_scalar_chunks .iter() @@ -294,7 +294,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill colors"); - debug_assert_eq!(Color::arrow2_datatype(), ArrowDatatype::UInt32); + debug_assert_eq!(Color::arrow2_datatype(), Arrow2Datatype::UInt32); fn map_raw_color(raw: &[u32]) -> Option { raw.first().map(|c| { @@ -348,7 +348,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill marker sizes"); - debug_assert_eq!(MarkerSize::arrow2_datatype(), ArrowDatatype::Float32); + debug_assert_eq!(MarkerSize::arrow2_datatype(), Arrow2Datatype::Float32); { let all_marker_size_chunks = results.get_optional_chunks(&MarkerSize::name()); diff --git a/crates/viewer/re_viewer_context/src/blueprint_helpers.rs b/crates/viewer/re_viewer_context/src/blueprint_helpers.rs index e2612d9871ad..d447a74a811e 100644 --- a/crates/viewer/re_viewer_context/src/blueprint_helpers.rs +++ b/crates/viewer/re_viewer_context/src/blueprint_helpers.rs @@ -1,4 +1,4 @@ -use re_chunk::{ArrowArray, RowId}; +use re_chunk::{Arrow2Array, RowId}; use re_chunk_store::external::re_chunk::Chunk; use re_log_types::{EntityPath, TimeInt, TimePoint, Timeline}; use re_types::{AsComponents, ComponentBatch, ComponentName}; @@ -86,7 +86,7 @@ impl ViewerContext<'_> { &self, entity_path: &EntityPath, component_name: ComponentName, - array: Box, + array: Box, ) { let timepoint = self.store_context.blueprint_timepoint_for_writes(); @@ -122,7 +122,7 @@ impl ViewerContext<'_> { &self, entity_path: &EntityPath, component_name: ComponentName, - ) -> Option> { + ) -> Option> { self.store_context .default_blueprint .and_then(|default_blueprint| { diff --git a/crates/viewer/re_viewer_context/src/component_ui_registry.rs b/crates/viewer/re_viewer_context/src/component_ui_registry.rs index 6898c448451c..d6a37d5d1020 100644 --- a/crates/viewer/re_viewer_context/src/component_ui_registry.rs +++ b/crates/viewer/re_viewer_context/src/component_ui_registry.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use re_chunk::{ArrowArray, RowId, UnitChunkShared}; +use re_chunk::{Arrow2Array, RowId, UnitChunkShared}; use re_chunk_store::LatestAtQuery; use re_entity_db::{EntityDb, EntityPath}; use re_log::ResultExt; @@ -522,7 +522,7 @@ impl ComponentUiRegistry { blueprint_write_path: &EntityPath, component_name: ComponentName, row_id: Option, - component_array: Option<&dyn ArrowArray>, + component_array: Option<&dyn Arrow2Array>, fallback_provider: &dyn ComponentFallbackProvider, ) { let multiline = true; @@ -553,7 +553,7 @@ impl ComponentUiRegistry { blueprint_write_path: &EntityPath, component_name: ComponentName, row_id: Option, - component_query_result: Option<&dyn ArrowArray>, + component_query_result: Option<&dyn Arrow2Array>, fallback_provider: &dyn ComponentFallbackProvider, ) { let multiline = false; @@ -579,7 +579,7 @@ impl ComponentUiRegistry { blueprint_write_path: &EntityPath, component_name: ComponentName, row_id: Option, - component_array: Option<&dyn ArrowArray>, + component_array: Option<&dyn Arrow2Array>, fallback_provider: &dyn ComponentFallbackProvider, allow_multiline: bool, ) { diff --git a/crates/viewer/re_viewer_context/src/space_view/view_context.rs b/crates/viewer/re_viewer_context/src/space_view/view_context.rs index 6eb27627f06b..6ed6a3e30595 100644 --- a/crates/viewer/re_viewer_context/src/space_view/view_context.rs +++ b/crates/viewer/re_viewer_context/src/space_view/view_context.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use re_chunk::ArrowArray; +use re_chunk::Arrow2Array; use re_chunk_store::LatestAtQuery; use re_log_types::{EntityPath, TimePoint}; use re_query::StorageEngineReadGuard; @@ -107,7 +107,7 @@ impl<'a> ViewContext<'a> { &self, entity_path: &EntityPath, component_name: ComponentName, - array: Box, + array: Box, ) { self.viewer_ctx .save_blueprint_array(entity_path, component_name, array); diff --git a/crates/viewer/re_viewer_context/src/viewer_context.rs b/crates/viewer/re_viewer_context/src/viewer_context.rs index fc8073df28b1..87ca4c28e453 100644 --- a/crates/viewer/re_viewer_context/src/viewer_context.rs +++ b/crates/viewer/re_viewer_context/src/viewer_context.rs @@ -176,7 +176,7 @@ impl<'a> ViewerContext<'a> { pub fn placeholder_for( &self, component: re_chunk::ComponentName, - ) -> Box { + ) -> Box { let datatype = if let Some(reflection) = self.reflection.components.get(&component) { // It's a builtin type with reflection. We either have custom place holder, or can rely on the known datatype. if let Some(placeholder) = reflection.custom_placeholder.as_ref() { diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 2d2e561e1429..bae0810496c6 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -1136,7 +1136,7 @@ fn log_arrow_msg( /// ------ /// entity_path: `str` /// The entity path to log the chunk to. -/// timelines: `Dict[str, ArrowPrimitiveArray]` +/// timelines: `Dict[str, Arrow2PrimitiveArray]` /// A dictionary mapping timeline names to their values. /// components: `Dict[str, ArrowListArray]` /// A dictionary mapping component names to their values. diff --git a/rerun_py/src/remote.rs b/rerun_py/src/remote.rs index ca71c68a6be8..87db9ca7bf59 100644 --- a/rerun_py/src/remote.rs +++ b/rerun_py/src/remote.rs @@ -156,7 +156,7 @@ enum MetadataLike { } impl MetadataLike { - fn to_arrow2(&self) -> PyResult> { + fn to_arrow2(&self) -> PyResult> { match self { Self::PyArrow(array) => { let array = arrow2::array::from_data(&array.0);