diff --git a/crates/store/re_chunk/src/batcher.rs b/crates/store/re_chunk/src/batcher.rs index f74f4dbfa221..f155fa5a0802 100644 --- a/crates/store/re_chunk/src/batcher.rs +++ b/crates/store/re_chunk/src/batcher.rs @@ -799,7 +799,7 @@ impl PendingRow { re_tracing::profile_scope!("iterate per timeline set"); // Then we split the micro batches even further -- one sub-batch per unique set of datatypes. - let mut per_datatype_set: IntMap> = + let mut per_datatype_set: IntMap> = Default::default(); { re_tracing::profile_scope!("compute datatype sets"); diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index 57d0a70a063f..3b49d06d58c0 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -7,7 +7,7 @@ use arrow2::{ }, chunk::Chunk as Arrow2Chunk, datatypes::{ - DataType as ArrowDatatype, Field as ArrowField, Metadata as ArrowMetadata, + DataType as Arrow2Datatype, Field as ArrowField, Metadata as Arrow2Metadata, Schema as Arrow2Schema, TimeUnit as ArrowTimeUnit, }, }; @@ -100,7 +100,7 @@ impl TransportChunk { /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for a Rerun [`ChunkId`]. #[inline] - pub fn chunk_metadata_id(id: ChunkId) -> ArrowMetadata { + pub fn chunk_metadata_id(id: ChunkId) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_ID.to_owned(), @@ -112,7 +112,7 @@ impl TransportChunk { /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for the in-memory size in bytes. #[inline] - pub fn chunk_metadata_heap_size_bytes(heap_size_bytes: u64) -> ArrowMetadata { + pub fn chunk_metadata_heap_size_bytes(heap_size_bytes: u64) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_HEAP_SIZE_BYTES.to_owned(), @@ -124,7 +124,7 @@ impl TransportChunk { /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for a Rerun [`EntityPath`]. #[inline] - pub fn chunk_metadata_entity_path(entity_path: &EntityPath) -> ArrowMetadata { + pub fn chunk_metadata_entity_path(entity_path: &EntityPath) -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_KEY_ENTITY_PATH.to_owned(), @@ -136,7 +136,7 @@ impl TransportChunk { /// Returns the appropriate chunk-level [`Arrow2Schema`] metadata for an `IS_SORTED` marker. #[inline] - pub fn chunk_metadata_is_sorted() -> ArrowMetadata { + pub fn chunk_metadata_is_sorted() -> Arrow2Metadata { [ ( Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID.to_owned(), @@ -148,7 +148,7 @@ impl TransportChunk { /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun time column. #[inline] - pub fn field_metadata_time_column() -> ArrowMetadata { + pub fn field_metadata_time_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -160,7 +160,7 @@ impl TransportChunk { /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun control column. #[inline] - pub fn field_metadata_control_column() -> ArrowMetadata { + pub fn field_metadata_control_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -172,7 +172,7 @@ impl TransportChunk { /// Returns the appropriate field-level [`Arrow2Schema`] metadata for a Rerun data column. #[inline] - pub fn field_metadata_data_column() -> ArrowMetadata { + pub fn field_metadata_data_column() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_KEY_KIND.to_owned(), @@ -184,7 +184,7 @@ impl TransportChunk { /// Returns the appropriate field-level [`Arrow2Schema`] metadata for an `IS_SORTED` marker. #[inline] - pub fn field_metadata_is_sorted() -> ArrowMetadata { + pub fn field_metadata_is_sorted() -> Arrow2Metadata { [ ( Self::FIELD_METADATA_MARKER_IS_SORTED_BY_TIME.to_owned(), @@ -492,8 +492,8 @@ impl Chunk { for (field, column) in transport.timelines() { // See also [`Timeline::datatype`] let timeline = match column.data_type().to_logical_type() { - ArrowDatatype::Int64 => Timeline::new_sequence(field.name.as_str()), - ArrowDatatype::Timestamp(ArrowTimeUnit::Nanosecond, None) => { + Arrow2Datatype::Int64 => Timeline::new_sequence(field.name.as_str()), + Arrow2Datatype::Timestamp(ArrowTimeUnit::Nanosecond, None) => { Timeline::new_temporal(field.name.as_str()) } _ => { diff --git a/crates/store/re_chunk/src/util.rs b/crates/store/re_chunk/src/util.rs index 32004a473ecc..c8e64b143c03 100644 --- a/crates/store/re_chunk/src/util.rs +++ b/crates/store/re_chunk/src/util.rs @@ -1,11 +1,11 @@ use arrow2::{ array::{ - Array as Arrow2Array, BooleanArray as ArrowBooleanArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, DictionaryArray as ArrowDictionaryArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, + PrimitiveArray as Arrow2PrimitiveArray, }, bitmap::Bitmap as ArrowBitmap, - datatypes::DataType as ArrowDatatype, + datatypes::DataType as Arrow2Datatype, offset::Offsets as ArrowOffsets, }; use itertools::Itertools; @@ -59,7 +59,7 @@ pub fn arrays_to_list_array_opt( /// /// Returns an empty list if `arrays` is empty. pub fn arrays_to_list_array( - array_datatype: ArrowDatatype, + array_datatype: Arrow2Datatype, arrays: &[Option<&dyn Arrow2Array>], ) -> Option> { let arrays_dense = arrays.iter().flatten().copied().collect_vec(); @@ -109,7 +109,7 @@ pub fn arrays_to_list_array( // TODO(cmc): A possible improvement would be to pick the smallest key datatype possible based // on the cardinality of the input arrays. pub fn arrays_to_dictionary( - array_datatype: &ArrowDatatype, + array_datatype: &Arrow2Datatype, arrays: &[Option<(Idx, &dyn Arrow2Array)>], ) -> Option> { // Dedupe the input arrays based on the given primary key. @@ -162,7 +162,7 @@ pub fn arrays_to_dictionary( ArrowListArray::::new(array_datatype.clone(), offsets.into(), values, None).to_boxed() }; - let datatype = ArrowDatatype::Dictionary( + let datatype = Arrow2Datatype::Dictionary( arrow2::datatypes::IntegerType::Int32, std::sync::Arc::new(array_datatype.clone()), true, // is_sorted @@ -172,7 +172,7 @@ pub fn arrays_to_dictionary( // unique values. ArrowDictionaryArray::try_new( datatype, - ArrowPrimitiveArray::::from(keys), + Arrow2PrimitiveArray::::from(keys), data.to_boxed(), ) .ok() @@ -310,7 +310,10 @@ pub fn pad_list_array_front( /// Returns a new [`ArrowListArray`] with len `entries`. /// /// Each entry will be an empty array of the given `child_datatype`. -pub fn new_list_array_of_empties(child_datatype: ArrowDatatype, len: usize) -> ArrowListArray { +pub fn new_list_array_of_empties( + child_datatype: Arrow2Datatype, + len: usize, +) -> ArrowListArray { let empty_array = arrow2::array::new_empty_array(child_datatype); #[allow(clippy::unwrap_used)] // yes, these are indeed lengths @@ -352,7 +355,7 @@ pub fn concat_arrays(arrays: &[&dyn Arrow2Array]) -> arrow2::error::Result(array: &A, filter: &ArrowBooleanArray) -> A { +pub fn filter_array(array: &A, filter: &Arrow2BooleanArray) -> A { assert_eq!( array.len(), filter.len(), "the length of the filter must match the length of the array (the underlying kernel will panic otherwise)", @@ -392,7 +395,7 @@ pub fn filter_array(array: &A, filter: &ArrowBooleanArra // For internal stuff, we could perhaps provide a custom implementation that returns a `DictionaryArray` instead? pub fn take_array( array: &A, - indices: &ArrowPrimitiveArray, + indices: &Arrow2PrimitiveArray, ) -> A { debug_assert!( indices.validity().is_none(), @@ -435,7 +438,7 @@ pub fn take_array( // --- -use arrow2::{chunk::Chunk as ArrowChunk, datatypes::Schema as ArrowSchema}; +use arrow2::{chunk::Chunk as Arrow2Chunk, datatypes::Schema as Arrow2Schema}; /// Concatenate multiple [`TransportChunk`]s into one. /// @@ -443,7 +446,7 @@ use arrow2::{chunk::Chunk as ArrowChunk, datatypes::Schema as ArrowSchema}; /// * `arrow2` doesn't have a `RecordBatch` type, therefore we emulate that using our `TransportChunk`s. /// * `arrow-rs` does have one, and it natively supports concatenation. pub fn concatenate_record_batches( - schema: ArrowSchema, + schema: Arrow2Schema, batches: &[TransportChunk], ) -> anyhow::Result { assert!(batches.iter().map(|batch| &batch.schema).all_equal()); @@ -464,6 +467,6 @@ pub fn concatenate_record_batches( Ok(TransportChunk { schema, - data: ArrowChunk::new(arrays), + data: Arrow2Chunk::new(arrays), }) } diff --git a/crates/store/re_chunk/tests/latest_at.rs b/crates/store/re_chunk/tests/latest_at.rs index 96ebe4c7bba0..5525a6fd8116 100644 --- a/crates/store/re_chunk/tests/latest_at.rs +++ b/crates/store/re_chunk/tests/latest_at.rs @@ -1,4 +1,4 @@ -use arrow2::datatypes::DataType as ArrowDatatype; +use arrow2::datatypes::DataType as Arrow2Datatype; use nohash_hasher::IntMap; use re_chunk::{Chunk, ComponentName, LatestAtQuery, RowId, TimePoint, Timeline}; @@ -9,7 +9,7 @@ use re_types_core::{Component, Loggable}; const ENTITY_PATH: &str = "my/entity"; -fn datatypes() -> IntMap { +fn datatypes() -> IntMap { [ (MyPoint::name(), MyPoint::arrow2_datatype()), (MyColor::name(), MyColor::arrow2_datatype()), diff --git a/crates/store/re_chunk/tests/memory_test.rs b/crates/store/re_chunk/tests/memory_test.rs index 227db18de96b..a6c0e7c6df7c 100644 --- a/crates/store/re_chunk/tests/memory_test.rs +++ b/crates/store/re_chunk/tests/memory_test.rs @@ -56,10 +56,10 @@ fn memory_use(run: impl Fn() -> R) -> (R, usize) { use arrow2::{ array::{ - Array as Arrow2Array, BooleanArray as ArrowBooleanArray, ListArray as ArrowListArray, - PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, ListArray as ArrowListArray, + PrimitiveArray as Arrow2PrimitiveArray, }, - offset::Offsets as ArrowOffsets, + offset::Offsets as Arrow2Offsets, }; use itertools::Itertools; @@ -79,7 +79,7 @@ fn concat_does_allocate() { std::iter::repeat(NUM_SCALARS as usize / 10) .take(10) .map(|_| { - ArrowPrimitiveArray::from_vec((0..NUM_SCALARS / 10).collect_vec()).to_boxed() + Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS / 10).collect_vec()).to_boxed() }) .collect_vec() }); @@ -117,8 +117,9 @@ fn concat_single_is_noop() { ((unconcatenated, unconcatenated_size_bytes), (concatenated, concatenated_size_bytes)), total_size_bytes, ) = memory_use(|| { - let unconcatenated = - memory_use(|| ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()).to_boxed()); + let unconcatenated = memory_use(|| { + Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()).to_boxed() + }); let concatenated = memory_use(|| re_chunk::util::concat_arrays(&[&*unconcatenated.0]).unwrap()); @@ -140,11 +141,11 @@ fn concat_single_is_noop() { { let unconcatenated = unconcatenated .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let concatenated = concatenated .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -168,10 +169,10 @@ fn filter_does_allocate() { let (((unfiltered, unfiltered_size_bytes), (filtered, filtered_size_bytes)), total_size_bytes) = memory_use(|| { let unfiltered = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -181,7 +182,7 @@ fn filter_does_allocate() { ) }); - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( (0..unfiltered.0.len()).map(|i| i % 2 == 0).collect_vec(), ); let filtered = memory_use(|| re_chunk::util::filter_array(&unfiltered.0, &filter)); @@ -203,12 +204,12 @@ fn filter_does_allocate() { let unfiltered = unfiltered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let filtered = filtered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -230,10 +231,10 @@ fn filter_empty_or_full_is_noop() { let (((unfiltered, unfiltered_size_bytes), (filtered, filtered_size_bytes)), total_size_bytes) = memory_use(|| { let unfiltered = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -243,7 +244,7 @@ fn filter_empty_or_full_is_noop() { ) }); - let filter = ArrowBooleanArray::from_slice( + let filter = Arrow2BooleanArray::from_slice( std::iter::repeat(true) .take(unfiltered.0.len()) .collect_vec(), @@ -269,12 +270,12 @@ fn filter_empty_or_full_is_noop() { let unfiltered = unfiltered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let filtered = filtered .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -301,10 +302,10 @@ fn take_does_not_allocate() { let (((untaken, untaken_size_bytes), (taken, taken_size_bytes)), total_size_bytes) = memory_use(|| { let untaken = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -314,7 +315,7 @@ fn take_does_not_allocate() { ) }); - let indices = ArrowPrimitiveArray::from_vec( + let indices = Arrow2PrimitiveArray::from_vec( (0..untaken.0.len() as i32) .filter(|i| i % 2 == 0) .collect_vec(), @@ -338,12 +339,12 @@ fn take_does_not_allocate() { let untaken = untaken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let taken = taken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( @@ -365,10 +366,10 @@ fn take_empty_or_full_is_noop() { let (((untaken, untaken_size_bytes), (taken, taken_size_bytes)), total_size_bytes) = memory_use(|| { let untaken = memory_use(|| { - let scalars = ArrowPrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); + let scalars = Arrow2PrimitiveArray::from_vec((0..NUM_SCALARS).collect_vec()); ArrowListArray::::new( ArrowListArray::::default_datatype(scalars.data_type().clone()), - ArrowOffsets::try_from_lengths( + Arrow2Offsets::try_from_lengths( std::iter::repeat(NUM_SCALARS as usize / 10).take(10), ) .unwrap() @@ -378,7 +379,7 @@ fn take_empty_or_full_is_noop() { ) }); - let indices = ArrowPrimitiveArray::from_vec((0..untaken.0.len() as i32).collect_vec()); + let indices = Arrow2PrimitiveArray::from_vec((0..untaken.0.len() as i32).collect_vec()); let taken = memory_use(|| re_chunk::util::take_array(&untaken.0, &indices)); (untaken, taken) @@ -400,12 +401,12 @@ fn take_empty_or_full_is_noop() { let untaken = untaken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); let taken = taken .values() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); assert!( diff --git a/crates/store/re_chunk/tests/range.rs b/crates/store/re_chunk/tests/range.rs index c94c7a0fc92e..996a27d473c2 100644 --- a/crates/store/re_chunk/tests/range.rs +++ b/crates/store/re_chunk/tests/range.rs @@ -1,4 +1,4 @@ -use arrow2::datatypes::DataType as ArrowDatatype; +use arrow2::datatypes::DataType as Arrow2Datatype; use nohash_hasher::IntMap; use re_chunk::{Chunk, ComponentName, RangeQuery, RowId, TimePoint, Timeline}; @@ -12,7 +12,7 @@ use re_types_core::{Component as _, Loggable as _}; const ENTITY_PATH: &str = "my/entity"; -fn datatypes() -> IntMap { +fn datatypes() -> IntMap { [ (MyPoint::name(), MyPoint::arrow2_datatype()), (MyColor::name(), MyColor::arrow2_datatype()), diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index fc63f2d5f82d..07aae3bb3963 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -4,7 +4,7 @@ use std::collections::{BTreeMap, BTreeSet}; use arrow2::{ array::ListArray as ArrowListArray, - datatypes::{DataType as ArrowDatatype, Field as ArrowField}, + datatypes::{DataType as Arrow2Datatype, Field as Arrow2Field}, }; use re_chunk::TimelineName; @@ -40,7 +40,7 @@ impl ColumnDescriptor { } #[inline] - pub fn datatype(&self) -> ArrowDatatype { + pub fn datatype(&self) -> Arrow2Datatype { match self { Self::Time(descr) => descr.datatype.clone(), Self::Component(descr) => descr.returned_datatype(), @@ -48,7 +48,7 @@ impl ColumnDescriptor { } #[inline] - pub fn to_arrow_field(&self) -> ArrowField { + pub fn to_arrow_field(&self) -> Arrow2Field { match self { Self::Time(descr) => descr.to_arrow_field(), Self::Component(descr) => descr.to_arrow_field(), @@ -79,7 +79,7 @@ pub struct TimeColumnDescriptor { pub timeline: Timeline, /// The Arrow datatype of the column. - pub datatype: ArrowDatatype, + pub datatype: Arrow2Datatype, } impl PartialOrd for TimeColumnDescriptor { @@ -103,9 +103,9 @@ impl Ord for TimeColumnDescriptor { impl TimeColumnDescriptor { #[inline] // Time column must be nullable since static data doesn't have a time. - pub fn to_arrow_field(&self) -> ArrowField { + pub fn to_arrow_field(&self) -> Arrow2Field { let Self { timeline, datatype } = self; - ArrowField::new( + Arrow2Field::new( timeline.name().to_string(), datatype.clone(), true, /* nullable */ @@ -149,7 +149,7 @@ pub struct ComponentColumnDescriptor { /// This is the log-time datatype corresponding to how this data is encoded /// in a chunk. Currently this will always be an [`ArrowListArray`], but as /// we introduce mono-type optimization, this might be a native type instead. - pub store_datatype: ArrowDatatype, + pub store_datatype: Arrow2Datatype, /// Whether this column represents static data. pub is_static: bool, @@ -289,13 +289,13 @@ impl ComponentColumnDescriptor { } #[inline] - pub fn returned_datatype(&self) -> ArrowDatatype { + pub fn returned_datatype(&self) -> Arrow2Datatype { self.store_datatype.clone() } #[inline] - pub fn to_arrow_field(&self) -> ArrowField { - ArrowField::new( + pub fn to_arrow_field(&self) -> Arrow2Field { + Arrow2Field::new( format!( "{}:{}", self.entity_path, @@ -752,7 +752,7 @@ impl ChunkStore { let datatype = self .lookup_datatype(&component_name) .cloned() - .unwrap_or_else(|| ArrowDatatype::Null); + .unwrap_or_else(|| Arrow2Datatype::Null); ComponentColumnDescriptor { entity_path: selector.entity_path.clone(), diff --git a/crates/store/re_chunk_store/src/store.rs b/crates/store/re_chunk_store/src/store.rs index 10d0b47c55f4..20a34fbce2b8 100644 --- a/crates/store/re_chunk_store/src/store.rs +++ b/crates/store/re_chunk_store/src/store.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::sync::atomic::AtomicU64; use std::sync::Arc; -use arrow2::datatypes::DataType as ArrowDataType; +use arrow2::datatypes::DataType as Arrow2DataType; use nohash_hasher::IntMap; use re_chunk::{Chunk, ChunkId, RowId, TransportChunk}; @@ -407,7 +407,7 @@ pub struct ChunkStore { // // TODO(cmc): this would become fairly problematic in a world where each chunk can use a // different datatype for a given component. - pub(crate) type_registry: IntMap, + pub(crate) type_registry: IntMap, pub(crate) per_column_metadata: BTreeMap>, @@ -633,9 +633,9 @@ impl ChunkStore { self.chunks_per_chunk_id.len() } - /// Lookup the _latest_ arrow [`ArrowDataType`] used by a specific [`re_types_core::Component`]. + /// Lookup the _latest_ arrow [`Arrow2DataType`] used by a specific [`re_types_core::Component`]. #[inline] - pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<&ArrowDataType> { + pub fn lookup_datatype(&self, component_name: &ComponentName) -> Option<&Arrow2DataType> { self.type_registry.get(component_name) } diff --git a/crates/store/re_chunk_store/src/writes.rs b/crates/store/re_chunk_store/src/writes.rs index 2f168e09545e..1d64945306d9 100644 --- a/crates/store/re_chunk_store/src/writes.rs +++ b/crates/store/re_chunk_store/src/writes.rs @@ -1,7 +1,7 @@ use std::{collections::BTreeSet, sync::Arc}; use ahash::HashMap; -use arrow2::array::{Array as _, ListArray as ArrowListArray}; +use arrow2::array::{Array as _, ListArray as Arrow2ListArray}; use itertools::Itertools as _; use re_chunk::{Chunk, EntityPath, RowId}; @@ -339,7 +339,7 @@ impl ChunkStore { for (&component_name, list_array) in chunk.components() { self.type_registry.insert( component_name, - ArrowListArray::::get_child_type(list_array.data_type()).clone(), + Arrow2ListArray::::get_child_type(list_array.data_type()).clone(), ); let column_metadata_state = self diff --git a/crates/store/re_data_loader/src/loader_archetype.rs b/crates/store/re_data_loader/src/loader_archetype.rs index 041541823a08..bbfc57f1cc47 100644 --- a/crates/store/re_data_loader/src/loader_archetype.rs +++ b/crates/store/re_data_loader/src/loader_archetype.rs @@ -5,7 +5,7 @@ use re_types::components::VideoTimestamp; use re_types::Archetype; use re_types::{components::MediaType, ComponentBatch}; -use arrow2::array::PrimitiveArray as ArrowPrimitiveArray; +use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; use arrow2::Either; use crate::{DataLoader, DataLoaderError, LoadedData}; @@ -193,7 +193,7 @@ fn load_video( Ok(frame_timestamps_ns) => { // Time column. let is_sorted = Some(true); - let time_column_times = ArrowPrimitiveArray::from_slice(&frame_timestamps_ns); + let time_column_times = Arrow2PrimitiveArray::from_slice(&frame_timestamps_ns); let time_column = re_chunk::TimeColumn::new(is_sorted, video_timeline, time_column_times); diff --git a/crates/store/re_dataframe/src/lib.rs b/crates/store/re_dataframe/src/lib.rs index dcf92a27e4d4..40702bc590ac 100644 --- a/crates/store/re_dataframe/src/lib.rs +++ b/crates/store/re_dataframe/src/lib.rs @@ -7,7 +7,7 @@ pub use self::engine::{QueryEngine, RecordBatch}; pub use self::query::QueryHandle; #[doc(no_inline)] -pub use self::external::arrow2::chunk::Chunk as ArrowChunk; +pub use self::external::arrow2::chunk::Chunk as Arrow2Chunk; #[doc(no_inline)] pub use self::external::re_chunk::{util::concatenate_record_batches, TransportChunk}; #[doc(no_inline)] diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 3e8e4ce726b9..0312ff3029d5 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -8,11 +8,11 @@ use std::{ use arrow2::{ array::{ - Array as Arrow2Array, BooleanArray as ArrowBooleanArray, - PrimitiveArray as ArrowPrimitiveArray, + Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, + PrimitiveArray as Arrow2PrimitiveArray, }, - chunk::Chunk as ArrowChunk, - datatypes::Schema as ArrowSchema, + chunk::Chunk as Arrow2Chunk, + datatypes::Schema as Arrow2Schema, Either, }; use itertools::Itertools; @@ -104,7 +104,7 @@ struct QueryHandleState { /// The Arrow schema that corresponds to the `selected_contents`. /// /// All returned rows will have this schema. - arrow_schema: ArrowSchema, + arrow_schema: Arrow2Schema, /// All the [`Chunk`]s included in the view contents. /// @@ -185,7 +185,7 @@ impl QueryHandle { // 3. Compute the Arrow schema of the selected components. // // Every result returned using this `QueryHandle` will match this schema exactly. - let arrow_schema = ArrowSchema { + let arrow_schema = Arrow2Schema { fields: selected_contents .iter() .map(|(_, descr)| descr.to_arrow_field()) @@ -518,9 +518,9 @@ impl QueryHandle { let values = list_array .values() .as_any() - .downcast_ref::()?; + .downcast_ref::()?; - let indices = ArrowPrimitiveArray::from_vec( + let indices = Arrow2PrimitiveArray::from_vec( values .iter() .enumerate() @@ -668,7 +668,7 @@ impl QueryHandle { /// /// Columns that do not yield any data will still be present in the results, filled with null values. #[inline] - pub fn schema(&self) -> &ArrowSchema { + pub fn schema(&self) -> &Arrow2Schema { &self.init().arrow_schema } @@ -1137,7 +1137,7 @@ impl QueryHandle { state.filtered_index, ( *cur_index_value, - ArrowPrimitiveArray::::from_vec(vec![cur_index_value.as_i64()]) + Arrow2PrimitiveArray::::from_vec(vec![cur_index_value.as_i64()]) .to(state.filtered_index.datatype()) .to_boxed(), ), @@ -1227,7 +1227,7 @@ impl QueryHandle { pub fn next_row_batch(&self) -> Option { Some(RecordBatch { schema: self.schema().clone(), - data: ArrowChunk::new(self.next_row()?), + data: Arrow2Chunk::new(self.next_row()?), }) } @@ -1245,7 +1245,7 @@ impl QueryHandle { Some(RecordBatch { schema, - data: ArrowChunk::new(row), + data: Arrow2Chunk::new(row), }) } } diff --git a/crates/store/re_protos/src/codec.rs b/crates/store/re_protos/src/codec.rs index 9503246efc10..375722bfac8d 100644 --- a/crates/store/re_protos/src/codec.rs +++ b/crates/store/re_protos/src/codec.rs @@ -191,11 +191,11 @@ impl RecordingMetadata { .position(|field| field.name == "id") .ok_or_else(|| CodecError::InvalidArgument("missing id field in schema".to_owned()))?; - use arrow2::array::Utf8Array as ArrowUtf8Array; + use arrow2::array::Utf8Array as Arrow2Utf8Array; let id = metadata.data.columns()[id_pos] .as_any() - .downcast_ref::>() + .downcast_ref::>() .ok_or_else(|| { CodecError::InvalidArgument(format!( "Unexpected type for id with position {id_pos} in schema: {:?}", @@ -253,11 +253,10 @@ fn read_arrow_from_bytes( #[cfg(test)] mod tests { - - use arrow2::array::Utf8Array as ArrowUtf8Array; + use arrow2::array::Utf8Array as Arrow2Utf8Array; use arrow2::chunk::Chunk as Arrow2Chunk; use arrow2::{ - array::Int32Array as ArrowInt32Array, datatypes::Field as ArrowField, + array::Int32Array as Arrow2Int32Array, datatypes::Field as Arrow2Field, datatypes::Schema as Arrow2Schema, }; use re_dataframe::external::re_chunk::{Chunk, RowId}; @@ -361,12 +360,12 @@ mod tests { #[test] fn test_recording_metadata_serialization() { let expected_schema = Arrow2Schema::from(vec![ - ArrowField::new("id", arrow2::datatypes::DataType::Utf8, false), - ArrowField::new("my_int", arrow2::datatypes::DataType::Int32, false), + Arrow2Field::new("id", arrow2::datatypes::DataType::Utf8, false), + Arrow2Field::new("my_int", arrow2::datatypes::DataType::Int32, false), ]); - let id = ArrowUtf8Array::::from_slice(["some_id"]); - let my_ints = ArrowInt32Array::from_slice([42]); + let id = Arrow2Utf8Array::::from_slice(["some_id"]); + let my_ints = Arrow2Int32Array::from_slice([42]); let expected_chunk = Arrow2Chunk::new(vec![Box::new(id) as _, Box::new(my_ints) as _]); let metadata_tc = TransportChunk { schema: expected_schema.clone(), @@ -387,13 +386,13 @@ mod tests { #[test] fn test_recording_metadata_fails_with_non_unit_batch() { - let expected_schema = Arrow2Schema::from(vec![ArrowField::new( + let expected_schema = Arrow2Schema::from(vec![Arrow2Field::new( "my_int", arrow2::datatypes::DataType::Int32, false, )]); // more than 1 row in the batch - let my_ints = ArrowInt32Array::from_slice([41, 42]); + let my_ints = Arrow2Int32Array::from_slice([41, 42]); let expected_chunk = Arrow2Chunk::new(vec![Box::new(my_ints) as _]); let metadata_tc = TransportChunk { diff --git a/crates/store/re_query/examples/latest_at.rs b/crates/store/re_query/examples/latest_at.rs index e096db0fb8af..362e9481ce7c 100644 --- a/crates/store/re_query/examples/latest_at.rs +++ b/crates/store/re_query/examples/latest_at.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Context; -use arrow2::array::PrimitiveArray as ArrowPrimitiveArray; +use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; use itertools::Itertools; use re_chunk::{Chunk, RowId}; @@ -78,7 +78,7 @@ fn main() -> anyhow::Result<()> { .context("invalid")?; let colors = colors .as_any() - .downcast_ref::>() + .downcast_ref::>() .context("invalid")?; let colors = colors .values() diff --git a/crates/store/re_types_core/src/loggable_batch.rs b/crates/store/re_types_core/src/loggable_batch.rs index 864ab4f2c06b..43a0c24ea395 100644 --- a/crates/store/re_types_core/src/loggable_batch.rs +++ b/crates/store/re_types_core/src/loggable_batch.rs @@ -1,6 +1,6 @@ use crate::{Component, ComponentName, Loggable, SerializationResult}; -use arrow2::array::ListArray as ArrowListArray; +use arrow2::array::ListArray as Arrow2ListArray; #[allow(unused_imports)] // used in docstrings use crate::Archetype; @@ -31,12 +31,12 @@ pub trait ComponentBatch: LoggableBatch { fn name(&self) -> ComponentName; /// Serializes the batch into an Arrow list array with a single component per list. - fn to_arrow_list_array(&self) -> SerializationResult> { + fn to_arrow_list_array(&self) -> SerializationResult> { let array = self.to_arrow2()?; let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::repeat(1).take(array.len()))?; - let data_type = ArrowListArray::::default_datatype(array.data_type().clone()); - ArrowListArray::::try_new(data_type, offsets.into(), array.to_boxed(), None) + let data_type = Arrow2ListArray::::default_datatype(array.data_type().clone()); + Arrow2ListArray::::try_new(data_type, offsets.into(), array.to_boxed(), None) .map_err(|err| err.into()) } } diff --git a/crates/top/re_sdk/src/recording_stream.rs b/crates/top/re_sdk/src/recording_stream.rs index 131a2dca6941..7ada4aec7bf7 100644 --- a/crates/top/re_sdk/src/recording_stream.rs +++ b/crates/top/re_sdk/src/recording_stream.rs @@ -9,7 +9,7 @@ use crossbeam::channel::{Receiver, Sender}; use itertools::Either; use parking_lot::Mutex; -use arrow2::array::{ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray}; +use arrow2::array::{ListArray as ArrowListArray, PrimitiveArray as Arrow2PrimitiveArray}; use re_chunk::{Chunk, ChunkBatcher, ChunkBatcherConfig, ChunkBatcherError, PendingRow, RowId}; use re_chunk::{ChunkError, ChunkId, ComponentName, TimeColumn}; @@ -1541,7 +1541,7 @@ impl RecordingStream { let time_timeline = Timeline::log_time(); let time = TimeInt::new_temporal(Time::now().nanos_since_epoch()); - let repeated_time = ArrowPrimitiveArray::::from_values( + let repeated_time = Arrow2PrimitiveArray::::from_values( std::iter::repeat(time.as_i64()).take(chunk.num_rows()), ) .to(time_timeline.datatype()); @@ -1565,7 +1565,7 @@ impl RecordingStream { .tick .fetch_add(1, std::sync::atomic::Ordering::Relaxed); - let repeated_tick = ArrowPrimitiveArray::::from_values( + let repeated_tick = Arrow2PrimitiveArray::::from_values( std::iter::repeat(tick).take(chunk.num_rows()), ) .to(tick_timeline.datatype()); diff --git a/crates/top/rerun/src/commands/rrd/filter.rs b/crates/top/rerun/src/commands/rrd/filter.rs index fdc2086729f1..53efe7cf9d4e 100644 --- a/crates/top/rerun/src/commands/rrd/filter.rs +++ b/crates/top/rerun/src/commands/rrd/filter.rs @@ -186,7 +186,7 @@ impl FilterCommand { // --- use re_sdk::{ - external::arrow2::{datatypes::Field as ArrowField, datatypes::Schema as ArrowSchema}, + external::arrow2::{datatypes::Field as ArrowField, datatypes::Schema as Arrow2Schema}, EntityPath, }; @@ -204,7 +204,7 @@ fn should_keep_timeline(dropped_timelines: &HashSet<&String>, field: &ArrowField fn should_keep_entity_path( dropped_entity_paths: &HashSet, - schema: &ArrowSchema, + schema: &Arrow2Schema, ) -> bool { let Some(entity_path) = schema .metadata diff --git a/crates/top/rerun_c/src/arrow_utils.rs b/crates/top/rerun_c/src/arrow_utils.rs index 660a2ba21387..cd33891e8aa6 100644 --- a/crates/top/rerun_c/src/arrow_utils.rs +++ b/crates/top/rerun_c/src/arrow_utils.rs @@ -12,7 +12,7 @@ pub unsafe fn arrow_array_from_c_ffi( array: &arrow2::ffi::ArrowArray, datatype: arrow2::datatypes::DataType, ) -> Result, CError> { - // Arrow2 implements drop for Arrow2Array and ArrowSchema. + // Arrow2 implements drop for Arrow2Array and Arrow2Schema. // // Therefore, for things to work correctly we have to take ownership of the array! // All methods passing arrow arrays through our C interface are documented to take ownership of the component batch. diff --git a/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs b/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs index accdd4ed4b07..e8217b0ca165 100644 --- a/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs +++ b/crates/viewer/re_space_view_dataframe/src/display_record_batch.rs @@ -6,7 +6,7 @@ use thiserror::Error; use re_chunk_store::external::arrow2::{ array::{ Array as Arrow2Array, DictionaryArray as Arrow2DictionaryArray, - ListArray as Arrow2ListArray, PrimitiveArray as ArrowPrimitiveArray, + ListArray as Arrow2ListArray, PrimitiveArray as Arrow2PrimitiveArray, }, datatypes::DataType, datatypes::DataType as Arrow2DataType, @@ -167,7 +167,7 @@ impl ComponentData { pub(crate) enum DisplayColumn { Timeline { timeline: Timeline, - time_data: ArrowPrimitiveArray, + time_data: Arrow2PrimitiveArray, }, Component { entity_path: EntityPath, @@ -186,7 +186,7 @@ impl DisplayColumn { ColumnDescriptor::Time(desc) => { let time_data = column_data .as_any() - .downcast_ref::>() + .downcast_ref::>() .ok_or_else(|| { DisplayRecordBatchError::UnexpectedTimeColumnDataType( desc.timeline.name().as_str().to_owned(), diff --git a/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs b/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs index bfe8ac821844..47af47be660d 100644 --- a/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs +++ b/crates/viewer/re_space_view_time_series/src/line_visualizer_system.rs @@ -5,7 +5,7 @@ use re_log_types::{EntityPath, TimeInt}; use re_space_view::range_with_blueprint_resolved_data; use re_types::archetypes; use re_types::components::{AggregationPolicy, ClearIsRecursive}; -use re_types::external::arrow2::datatypes::DataType as ArrowDatatype; +use re_types::external::arrow2::datatypes::DataType as Arrow2Datatype; use re_types::{ archetypes::SeriesLine, components::{Color, Name, Scalar, StrokeWidth}, @@ -240,7 +240,7 @@ impl SeriesLineSystem { chunk.iter_component_indices(&query.timeline(), &Scalar::name()) }) .map(|(data_time, _)| { - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); PlotPoint { time: data_time.as_i64(), @@ -254,7 +254,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill values"); - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); all_scalar_chunks .iter() .flat_map(|chunk| chunk.iter_primitive::(&Scalar::name())) @@ -278,7 +278,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill colors"); - debug_assert_eq!(Color::arrow2_datatype(), ArrowDatatype::UInt32); + debug_assert_eq!(Color::arrow2_datatype(), Arrow2Datatype::UInt32); fn map_raw_color(raw: &[u32]) -> Option { raw.first().map(|c| { @@ -331,7 +331,7 @@ impl SeriesLineSystem { { re_tracing::profile_scope!("fill stroke widths"); - debug_assert_eq!(StrokeWidth::arrow2_datatype(), ArrowDatatype::Float32); + debug_assert_eq!(StrokeWidth::arrow2_datatype(), Arrow2Datatype::Float32); { let all_stroke_width_chunks = results.get_optional_chunks(&StrokeWidth::name()); diff --git a/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs b/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs index 960c7596e6f0..c0a9f2aedf78 100644 --- a/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs +++ b/crates/viewer/re_space_view_time_series/src/point_visualizer_system.rs @@ -4,7 +4,7 @@ use re_space_view::range_with_blueprint_resolved_data; use re_types::{ archetypes::{self, SeriesPoint}, components::{Color, MarkerShape, MarkerSize, Name, Scalar}, - external::arrow2::datatypes::DataType as ArrowDatatype, + external::arrow2::datatypes::DataType as Arrow2Datatype, Archetype as _, Component as _, Loggable as _, }; use re_viewer_context::{ @@ -254,7 +254,7 @@ impl SeriesPointSystem { chunk.iter_component_indices(&query.timeline(), &Scalar::name()) }) .map(|(data_time, _)| { - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); PlotPoint { time: data_time.as_i64(), @@ -268,7 +268,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill values"); - debug_assert_eq!(Scalar::arrow2_datatype(), ArrowDatatype::Float64); + debug_assert_eq!(Scalar::arrow2_datatype(), Arrow2Datatype::Float64); let mut i = 0; all_scalar_chunks .iter() @@ -294,7 +294,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill colors"); - debug_assert_eq!(Color::arrow2_datatype(), ArrowDatatype::UInt32); + debug_assert_eq!(Color::arrow2_datatype(), Arrow2Datatype::UInt32); fn map_raw_color(raw: &[u32]) -> Option { raw.first().map(|c| { @@ -348,7 +348,7 @@ impl SeriesPointSystem { { re_tracing::profile_scope!("fill marker sizes"); - debug_assert_eq!(MarkerSize::arrow2_datatype(), ArrowDatatype::Float32); + debug_assert_eq!(MarkerSize::arrow2_datatype(), Arrow2Datatype::Float32); { let all_marker_size_chunks = results.get_optional_chunks(&MarkerSize::name()); diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 2d2e561e1429..bae0810496c6 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -1136,7 +1136,7 @@ fn log_arrow_msg( /// ------ /// entity_path: `str` /// The entity path to log the chunk to. -/// timelines: `Dict[str, ArrowPrimitiveArray]` +/// timelines: `Dict[str, Arrow2PrimitiveArray]` /// A dictionary mapping timeline names to their values. /// components: `Dict[str, ArrowListArray]` /// A dictionary mapping component names to their values.