Skip to content

Commit

Permalink
arrow2-convert migration 5: sunset arrow2-convert (#3917)
Browse files Browse the repository at this point in the history
The end of our wonderful journey.

- `NumInstances` control column now has an actual dedicated component
type.
- `EntityPath` is now a component.
- `Into<Cow<Self>>` impls have been cleaned up to generate way less
code.
- `arrow2_convert` is fully gone.


---

`arrow2-convert` migration PR series:
- #3853 
- #3855
- #3897
- #3902
- #3917
  • Loading branch information
teh-cmc authored Oct 19, 2023
1 parent e537874 commit 26654cd
Show file tree
Hide file tree
Showing 146 changed files with 870 additions and 1,825 deletions.
77 changes: 1 addition & 76 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ impl DataStore {
/// dataframes.
///
/// See [`DataStoreConfig::store_insert_ids`].
pub fn insert_id_key() -> ComponentName {
"rerun.insert_id".into()
pub fn insert_id_component_name() -> ComponentName {
"rerun.controls.InsertId".into()
}

/// Return the current `StoreGeneration`. This can be used to determine whether the
Expand Down
18 changes: 9 additions & 9 deletions crates/re_arrow_store/src/store_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ use std::collections::BTreeMap;

use arrow2::{array::Array, chunk::Chunk, datatypes::Schema};
use nohash_hasher::IntMap;
use re_log_types::{
DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID,
COLUMN_NUM_INSTANCES,
};
use re_log_types::{DataCellColumn, DataTable, DataTableResult, NumInstances, RowId, Timeline};
use re_types_core::ComponentName;

use crate::store::{IndexedBucket, IndexedBucketInner, PersistentIndexedTable};
Expand Down Expand Up @@ -93,7 +90,7 @@ fn serialize(
col_time: Option<(Timeline, &[i64])>,
col_insert_id: &[u64],
col_row_id: &[RowId],
col_num_instances: &[u32],
col_num_instances: &[NumInstances],
table: &IntMap<ComponentName, DataCellColumn>,
) -> DataTableResult<(Schema, Chunk<Box<dyn Array>>)> {
re_tracing::profile_function!();
Expand Down Expand Up @@ -128,7 +125,7 @@ fn serialize_control_columns(
col_time: Option<(Timeline, &[i64])>,
col_insert_id: &[u64],
col_row_id: &[RowId],
col_num_instances: &[u32],
col_num_instances: &[NumInstances],
) -> DataTableResult<(Schema, Vec<Box<dyn Array>>)> {
re_tracing::profile_function!();

Expand All @@ -143,8 +140,11 @@ fn serialize_control_columns(

// NOTE: Optional column, so make sure it's actually there:
if !col_insert_id.is_empty() {
let (insert_id_field, insert_id_column) =
DataTable::serialize_primitive_column(COLUMN_INSERT_ID, col_insert_id, None);
let (insert_id_field, insert_id_column) = DataTable::serialize_primitive_column(
&crate::DataStore::insert_id_component_name(),
col_insert_id,
None,
);
schema.fields.push(insert_id_field);
columns.push(insert_id_column);
}
Expand All @@ -164,7 +164,7 @@ fn serialize_control_columns(
}

let (num_instances_field, num_instances_column) =
DataTable::serialize_primitive_column(COLUMN_NUM_INSTANCES, col_num_instances, None);
DataTable::serialize_control_column(col_num_instances)?;
schema.fields.push(num_instances_field);
columns.push(num_instances_column);

Expand Down
4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ impl DataStore {
self.timeless_tables.retain(|_, table| {
// If any column is non-empty, we need to keep this table
for num in &table.col_num_instances {
if num != &0 {
if num.get() != 0 {
return true;
}
}
Expand All @@ -395,7 +395,7 @@ impl DataStore {
for bucket in table.buckets.values() {
let inner = bucket.inner.read();
for num in &inner.col_num_instances {
if num != &0 {
if num.get() != 0 {
return true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store_polars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ impl DataStore {
let df = sort_df_columns(&df, self.config.store_insert_ids, &timelines);

let has_timeless = df.column(TIMELESS_COL).is_ok();
let insert_id_col = DataStore::insert_id_key();
let insert_id_col = DataStore::insert_id_component_name();

const ASCENDING: bool = false;
const DESCENDING: bool = true;
Expand Down Expand Up @@ -264,7 +264,7 @@ fn insert_ids_as_series(col_insert_id: &InsertIdVec) -> Series {

let insert_ids = arrow2::array::UInt64Array::from_slice(col_insert_id.as_slice());
new_infallible_series(
DataStore::insert_id_key().as_ref(),
DataStore::insert_id_component_name().as_ref(),
&insert_ids,
insert_ids.len(),
)
Expand Down
12 changes: 7 additions & 5 deletions crates/re_arrow_store/src/store_sanity.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use re_log_types::{DataCellColumn, RowId, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_TIMEPOINT};
use re_log_types::{DataCellColumn, NumInstances, RowId, TimeRange};
use re_types_core::{ComponentName, Loggable, SizeBytes as _};

use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable};
Expand Down Expand Up @@ -184,14 +184,16 @@ impl IndexedBucket {

// All columns should be `Self::num_rows` long.
{
const COLUMN_TIMEPOINT: &str = "rerun.controls.TimePoint";

let num_rows = self.num_rows();

let column_lengths = [
(!col_insert_id.is_empty())
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
Some((COLUMN_TIMEPOINT.into(), col_time.len())),
Some((RowId::name(), col_row_id.len())),
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
Some((NumInstances::name(), col_num_instances.len())),
]
.into_iter()
.flatten()
Expand Down Expand Up @@ -270,9 +272,9 @@ impl PersistentIndexedTable {

let column_lengths = [
(!col_insert_id.is_empty())
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
Some((RowId::name(), col_row_id.len())),
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
Some((NumInstances::name(), col_num_instances.len())),
]
.into_iter()
.flatten()
Expand Down
2 changes: 1 addition & 1 deletion crates/re_arrow_store/src/store_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ impl DataStore {
// one… unless we've already generated one of this exact length in the past,
// in which case we can simply re-use that cell.

Some(self.generate_cluster_cell(num_instances))
Some(self.generate_cluster_cell(num_instances.into()))
};

let insert_id = self.config.store_insert_ids.then_some(self.insert_id);
Expand Down
2 changes: 1 addition & 1 deletion crates/re_arrow_store/tests/data_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> D
let num_instances = row.num_instances();
Series::try_from((
cluster_key.as_ref(),
DataCell::from_component::<InstanceKey>(0..num_instances as u64)
DataCell::from_component::<InstanceKey>(0..num_instances.get() as u64)
.to_arrow_monolist(),
))
.unwrap()
Expand Down
2 changes: 1 addition & 1 deletion crates/re_log_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ arrow2 = { workspace = true, features = [
"io_print",
"compute_concatenate",
] }
arrow2_convert.workspace = true
backtrace.workspace = true
bytemuck.workspace = true
document-features.workspace = true
fixed = { version = "1.17", default-features = false, features = ["serde"] }
# `fixed` depends on `half`, so even though `half` is not directly used in this crate,
Expand Down
10 changes: 3 additions & 7 deletions crates/re_log_types/src/data_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// ## Example
///
/// ```rust
/// # use arrow2_convert::field::ArrowField as _;
/// # use itertools::Itertools as _;
/// #
/// # use re_log_types::DataCell;
Expand All @@ -97,7 +96,7 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// #
/// # assert_eq!(MyPoint::name(), cell.component_name());
/// # assert_eq!(3, cell.num_instances());
/// # assert_eq!(cell.datatype(), &MyPoint::data_type());
/// # assert_eq!(cell.datatype(), &MyPoint::arrow_datatype());
/// #
/// # assert_eq!(points, cell.to_native().as_slice());
/// ```
Expand Down Expand Up @@ -157,7 +156,6 @@ pub struct DataCellInner {
pub(crate) values: Box<dyn arrow2::array::Array>,
}

// TODO(cmc): We should be able to build a cell from non-reference types.
// TODO(#1696): We shouldn't have to specify the component name separately, this should be
// part of the metadata by using an extension.
// TODO(#1696): Check that the array is indeed a leaf / component type when building a cell from an
Expand Down Expand Up @@ -232,6 +230,7 @@ impl DataCell {
}

/// Builds a cell from an iterable of items that can be turned into a [`Component`].
#[inline]
pub fn from_component<'a, C>(values: impl IntoIterator<Item = impl Into<C>>) -> Self
where
C: Component + Clone + 'a,
Expand All @@ -241,10 +240,7 @@ impl DataCell {
}

/// Builds a cell from an iterable of items that can be turned into a [`Component`].
///
/// ⚠ Due to quirks in `arrow2-convert`, this requires consuming and collecting the passed-in
/// iterator into a vector first.
/// Prefer [`Self::from_native`] when performance matters.
#[inline]
pub fn from_component_sparse<'a, C>(
values: impl IntoIterator<Item = Option<impl Into<C>>>,
) -> Self
Expand Down
8 changes: 4 additions & 4 deletions crates/re_log_types/src/data_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use smallvec::SmallVec;

use re_types_core::{AsComponents, ComponentName, SizeBytes};

use crate::{DataCell, DataCellError, DataTable, EntityPath, TableId, TimePoint};
use crate::{DataCell, DataCellError, DataTable, EntityPath, NumInstances, TableId, TimePoint};

// ---

Expand Down Expand Up @@ -249,7 +249,7 @@ pub struct DataRow {
/// - 0 instance (clear),
/// - 1 instance (splat),
/// - `num_instances` instances (standard).
pub num_instances: u32,
pub num_instances: NumInstances,

/// The actual cells (== columns, == components).
pub cells: DataCellRow,
Expand Down Expand Up @@ -344,7 +344,7 @@ impl DataRow {
row_id,
entity_path,
timepoint,
num_instances,
num_instances: num_instances.into(),
cells,
})
}
Expand Down Expand Up @@ -401,7 +401,7 @@ impl DataRow {
}

#[inline]
pub fn num_instances(&self) -> u32 {
pub fn num_instances(&self) -> NumInstances {
self.num_instances
}

Expand Down
Loading

1 comment on commit 26654cd

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Rust Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.25.

Benchmark suite Current: 26654cd Previous: e537874 Ratio
mono_points_arrow_batched/generate_messages 4393739 ns/iter (± 51836) 3128791 ns/iter (± 14849) 1.40

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.