Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arrow2-convert migration 5: sunset arrow2-convert #3917

Merged
merged 13 commits into from
Oct 19, 2023
Merged
77 changes: 1 addition & 76 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ impl DataStore {
/// dataframes.
///
/// See [`DataStoreConfig::store_insert_ids`].
pub fn insert_id_key() -> ComponentName {
"rerun.insert_id".into()
pub fn insert_id_component_name() -> ComponentName {
"rerun.controls.InsertId".into()
}

/// Return the current `StoreGeneration`. This can be used to determine whether the
Expand Down
18 changes: 9 additions & 9 deletions crates/re_arrow_store/src/store_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ use std::collections::BTreeMap;

use arrow2::{array::Array, chunk::Chunk, datatypes::Schema};
use nohash_hasher::IntMap;
use re_log_types::{
DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID,
COLUMN_NUM_INSTANCES,
};
use re_log_types::{DataCellColumn, DataTable, DataTableResult, NumInstances, RowId, Timeline};
use re_types_core::ComponentName;

use crate::store::{IndexedBucket, IndexedBucketInner, PersistentIndexedTable};
Expand Down Expand Up @@ -93,7 +90,7 @@ fn serialize(
col_time: Option<(Timeline, &[i64])>,
col_insert_id: &[u64],
col_row_id: &[RowId],
col_num_instances: &[u32],
col_num_instances: &[NumInstances],
table: &IntMap<ComponentName, DataCellColumn>,
) -> DataTableResult<(Schema, Chunk<Box<dyn Array>>)> {
re_tracing::profile_function!();
Expand Down Expand Up @@ -128,7 +125,7 @@ fn serialize_control_columns(
col_time: Option<(Timeline, &[i64])>,
col_insert_id: &[u64],
col_row_id: &[RowId],
col_num_instances: &[u32],
col_num_instances: &[NumInstances],
) -> DataTableResult<(Schema, Vec<Box<dyn Array>>)> {
re_tracing::profile_function!();

Expand All @@ -143,8 +140,11 @@ fn serialize_control_columns(

// NOTE: Optional column, so make sure it's actually there:
if !col_insert_id.is_empty() {
let (insert_id_field, insert_id_column) =
DataTable::serialize_primitive_column(COLUMN_INSERT_ID, col_insert_id, None);
let (insert_id_field, insert_id_column) = DataTable::serialize_primitive_column(
&crate::DataStore::insert_id_component_name(),
col_insert_id,
None,
);
schema.fields.push(insert_id_field);
columns.push(insert_id_column);
}
Expand All @@ -164,7 +164,7 @@ fn serialize_control_columns(
}

let (num_instances_field, num_instances_column) =
DataTable::serialize_primitive_column(COLUMN_NUM_INSTANCES, col_num_instances, None);
DataTable::serialize_control_column(col_num_instances)?;
schema.fields.push(num_instances_field);
columns.push(num_instances_column);

Expand Down
4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ impl DataStore {
self.timeless_tables.retain(|_, table| {
// If any column is non-empty, we need to keep this table
for num in &table.col_num_instances {
if num != &0 {
if num.get() != 0 {
return true;
}
}
Expand All @@ -395,7 +395,7 @@ impl DataStore {
for bucket in table.buckets.values() {
let inner = bucket.inner.read();
for num in &inner.col_num_instances {
if num != &0 {
if num.get() != 0 {
return true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/re_arrow_store/src/store_polars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ impl DataStore {
let df = sort_df_columns(&df, self.config.store_insert_ids, &timelines);

let has_timeless = df.column(TIMELESS_COL).is_ok();
let insert_id_col = DataStore::insert_id_key();
let insert_id_col = DataStore::insert_id_component_name();

const ASCENDING: bool = false;
const DESCENDING: bool = true;
Expand Down Expand Up @@ -264,7 +264,7 @@ fn insert_ids_as_series(col_insert_id: &InsertIdVec) -> Series {

let insert_ids = arrow2::array::UInt64Array::from_slice(col_insert_id.as_slice());
new_infallible_series(
DataStore::insert_id_key().as_ref(),
DataStore::insert_id_component_name().as_ref(),
&insert_ids,
insert_ids.len(),
)
Expand Down
12 changes: 7 additions & 5 deletions crates/re_arrow_store/src/store_sanity.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use re_log_types::{DataCellColumn, RowId, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_TIMEPOINT};
use re_log_types::{DataCellColumn, NumInstances, RowId, TimeRange};
use re_types_core::{ComponentName, Loggable, SizeBytes as _};

use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable};
Expand Down Expand Up @@ -184,14 +184,16 @@ impl IndexedBucket {

// All columns should be `Self::num_rows` long.
{
const COLUMN_TIMEPOINT: &str = "rerun.controls.TimePoint";

let num_rows = self.num_rows();

let column_lengths = [
(!col_insert_id.is_empty())
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
Some((COLUMN_TIMEPOINT.into(), col_time.len())),
Some((RowId::name(), col_row_id.len())),
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
Some((NumInstances::name(), col_num_instances.len())),
]
.into_iter()
.flatten()
Expand Down Expand Up @@ -270,9 +272,9 @@ impl PersistentIndexedTable {

let column_lengths = [
(!col_insert_id.is_empty())
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
Some((RowId::name(), col_row_id.len())),
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
Some((NumInstances::name(), col_num_instances.len())),
]
.into_iter()
.flatten()
Expand Down
2 changes: 1 addition & 1 deletion crates/re_arrow_store/src/store_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ impl DataStore {
// one… unless we've already generated one of this exact length in the past,
// in which case we can simply re-use that cell.

Some(self.generate_cluster_cell(num_instances))
Some(self.generate_cluster_cell(num_instances.into()))
};

let insert_id = self.config.store_insert_ids.then_some(self.insert_id);
Expand Down
2 changes: 1 addition & 1 deletion crates/re_arrow_store/tests/data_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> D
let num_instances = row.num_instances();
Series::try_from((
cluster_key.as_ref(),
DataCell::from_component::<InstanceKey>(0..num_instances as u64)
DataCell::from_component::<InstanceKey>(0..num_instances.get() as u64)
.to_arrow_monolist(),
))
.unwrap()
Expand Down
2 changes: 1 addition & 1 deletion crates/re_log_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ arrow2 = { workspace = true, features = [
"io_print",
"compute_concatenate",
] }
arrow2_convert.workspace = true
backtrace.workspace = true
bytemuck.workspace = true
document-features.workspace = true
fixed = { version = "1.17", default-features = false, features = ["serde"] }
# `fixed` depends on `half`, so even though `half` is not directly used in this crate,
Expand Down
10 changes: 3 additions & 7 deletions crates/re_log_types/src/data_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// ## Example
///
/// ```rust
/// # use arrow2_convert::field::ArrowField as _;
/// # use itertools::Itertools as _;
/// #
/// # use re_log_types::DataCell;
Expand All @@ -97,7 +96,7 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// #
/// # assert_eq!(MyPoint::name(), cell.component_name());
/// # assert_eq!(3, cell.num_instances());
/// # assert_eq!(cell.datatype(), &MyPoint::data_type());
/// # assert_eq!(cell.datatype(), &MyPoint::arrow_datatype());
/// #
/// # assert_eq!(points, cell.to_native().as_slice());
/// ```
Expand Down Expand Up @@ -157,7 +156,6 @@ pub struct DataCellInner {
pub(crate) values: Box<dyn arrow2::array::Array>,
}

// TODO(cmc): We should be able to build a cell from non-reference types.
// TODO(#1696): We shouldn't have to specify the component name separately, this should be
// part of the metadata by using an extension.
// TODO(#1696): Check that the array is indeed a leaf / component type when building a cell from an
Expand Down Expand Up @@ -232,6 +230,7 @@ impl DataCell {
}

/// Builds a cell from an iterable of items that can be turned into a [`Component`].
#[inline]
pub fn from_component<'a, C>(values: impl IntoIterator<Item = impl Into<C>>) -> Self
where
C: Component + Clone + 'a,
Expand All @@ -241,10 +240,7 @@ impl DataCell {
}

/// Builds a cell from an iterable of items that can be turned into a [`Component`].
///
/// ⚠ Due to quirks in `arrow2-convert`, this requires consuming and collecting the passed-in
/// iterator into a vector first.
/// Prefer [`Self::from_native`] when performance matters.
#[inline]
pub fn from_component_sparse<'a, C>(
values: impl IntoIterator<Item = Option<impl Into<C>>>,
) -> Self
Expand Down
8 changes: 4 additions & 4 deletions crates/re_log_types/src/data_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use smallvec::SmallVec;

use re_types_core::{AsComponents, ComponentName, SizeBytes};

use crate::{DataCell, DataCellError, DataTable, EntityPath, TableId, TimePoint};
use crate::{DataCell, DataCellError, DataTable, EntityPath, NumInstances, TableId, TimePoint};

// ---

Expand Down Expand Up @@ -249,7 +249,7 @@ pub struct DataRow {
/// - 0 instance (clear),
/// - 1 instance (splat),
/// - `num_instances` instances (standard).
pub num_instances: u32,
pub num_instances: NumInstances,

/// The actual cells (== columns, == components).
pub cells: DataCellRow,
Expand Down Expand Up @@ -344,7 +344,7 @@ impl DataRow {
row_id,
entity_path,
timepoint,
num_instances,
num_instances: num_instances.into(),
cells,
})
}
Expand Down Expand Up @@ -401,7 +401,7 @@ impl DataRow {
}

#[inline]
pub fn num_instances(&self) -> u32 {
pub fn num_instances(&self) -> NumInstances {
self.num_instances
}

Expand Down
Loading
Loading