Skip to content

Commit

Permalink
Rust API: be explicit about when we're using the arrow2 crate
Browse files Browse the repository at this point in the history
This is part of migrating away from arrow2
  • Loading branch information
emilk committed Nov 21, 2024
1 parent 193e1bb commit 488f894
Show file tree
Hide file tree
Showing 276 changed files with 2,002 additions and 1,986 deletions.
38 changes: 19 additions & 19 deletions crates/build/re_types_builder/src/codegen/rust/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -864,24 +864,24 @@ fn quote_trait_impls_for_datatype_or_component(
let quoted_arrow_datatype = if let Some(forwarded_type) = forwarded_type.as_ref() {
quote! {
#[inline]
fn arrow_datatype() -> arrow2::datatypes::DataType {
#forwarded_type::arrow_datatype()
fn arrow2_datatype() -> arrow2::datatypes::DataType {
#forwarded_type::arrow2_datatype()
}
}
} else {
let datatype = ArrowDataTypeTokenizer(&datatype, false);
quote! {
#[inline]
fn arrow_datatype() -> arrow2::datatypes::DataType {
fn arrow2_datatype() -> arrow2::datatypes::DataType {
#![allow(clippy::wildcard_imports)]
use arrow2::datatypes::*;
#datatype
}
}
};

let quoted_from_arrow = if optimize_for_buffer_slice {
let from_arrow_body = if let Some(forwarded_type) = forwarded_type.as_ref() {
let quoted_from_arrow2 = if optimize_for_buffer_slice {
let from_arrow2_body = if let Some(forwarded_type) = forwarded_type.as_ref() {
let is_pod = obj
.try_get_attr::<String>(ATTR_RUST_DERIVE)
.map_or(false, |d| d.contains("bytemuck::Pod"))
Expand All @@ -890,11 +890,11 @@ fn quote_trait_impls_for_datatype_or_component(
.map_or(false, |d| d.contains("bytemuck::Pod"));
if is_pod {
quote! {
#forwarded_type::from_arrow(arrow_data).map(bytemuck::cast_vec)
#forwarded_type::from_arrow2(arrow_data).map(bytemuck::cast_vec)
}
} else {
quote! {
#forwarded_type::from_arrow(arrow_data).map(|v| v.into_iter().map(Self).collect())
#forwarded_type::from_arrow2(arrow_data).map(|v| v.into_iter().map(Self).collect())
}
}
} else {
Expand Down Expand Up @@ -923,13 +923,13 @@ fn quote_trait_impls_for_datatype_or_component(

quote! {
#[inline]
fn from_arrow(
fn from_arrow2(
arrow_data: &dyn arrow2::array::Array,
) -> DeserializationResult<Vec<Self>>
where
Self: Sized
{
#from_arrow_body
#from_arrow2_body
}
}
} else {
Expand All @@ -939,7 +939,7 @@ fn quote_trait_impls_for_datatype_or_component(
// Forward deserialization to existing datatype if it's transparent.
let quoted_deserializer = if let Some(forwarded_type) = forwarded_type.as_ref() {
quote! {
#forwarded_type::from_arrow_opt(arrow_data).map(|v| v.into_iter().map(|v| v.map(Self)).collect())
#forwarded_type::from_arrow2_opt(arrow_data).map(|v| v.into_iter().map(|v| v.map(Self)).collect())
}
} else {
let quoted_deserializer = quote_arrow_deserializer(arrow_registry, objects, obj);
Expand All @@ -956,13 +956,13 @@ fn quote_trait_impls_for_datatype_or_component(

let quoted_serializer = if let Some(forwarded_type) = forwarded_type.as_ref() {
quote! {
fn to_arrow_opt<'a>(
fn to_arrow2_opt<'a>(
data: impl IntoIterator<Item = Option<impl Into<::std::borrow::Cow<'a, Self>>>>,
) -> SerializationResult<Box<dyn arrow2::array::Array>>
where
Self: Clone + 'a,
{
#forwarded_type::to_arrow_opt(data.into_iter().map(|datum| {
#forwarded_type::to_arrow2_opt(data.into_iter().map(|datum| {
datum.map(|datum| match datum.into() {
::std::borrow::Cow::Borrowed(datum) => ::std::borrow::Cow::Borrowed(&datum.0),
::std::borrow::Cow::Owned(datum) => ::std::borrow::Cow::Owned(datum.0),
Expand All @@ -976,7 +976,7 @@ fn quote_trait_impls_for_datatype_or_component(

quote! {
// NOTE: Don't inline this, this gets _huge_.
fn to_arrow_opt<'a>(
fn to_arrow2_opt<'a>(
data: impl IntoIterator<Item = Option<impl Into<::std::borrow::Cow<'a, Self>>>>,
) -> SerializationResult<Box<dyn arrow2::array::Array>>
where
Expand Down Expand Up @@ -1015,7 +1015,7 @@ fn quote_trait_impls_for_datatype_or_component(
#quoted_serializer

// NOTE: Don't inline this, this gets _huge_.
fn from_arrow_opt(
fn from_arrow2_opt(
arrow_data: &dyn arrow2::array::Array,
) -> DeserializationResult<Vec<Option<Self>>>
where
Expand All @@ -1024,7 +1024,7 @@ fn quote_trait_impls_for_datatype_or_component(
#quoted_deserializer
}

#quoted_from_arrow
#quoted_from_arrow2
}

#quoted_impl_component
Expand Down Expand Up @@ -1173,7 +1173,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {

quote! {
if let Some(array) = arrays_by_name.get(#field_typ_fqname_str) {
<#component>::from_arrow_opt(&**array)
<#component>::from_arrow2_opt(&**array)
.with_context(#obj_field_fqname)?
#quoted_collection
} else {
Expand All @@ -1184,7 +1184,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
quote! {
if let Some(array) = arrays_by_name.get(#field_typ_fqname_str) {
Some({
<#component>::from_arrow_opt(&**array)
<#component>::from_arrow2_opt(&**array)
.with_context(#obj_field_fqname)?
#quoted_collection
})
Expand All @@ -1199,7 +1199,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
.ok_or_else(DeserializationError::missing_data)
.with_context(#obj_field_fqname)?;

<#component>::from_arrow_opt(&**array).with_context(#obj_field_fqname)? #quoted_collection
<#component>::from_arrow2_opt(&**array).with_context(#obj_field_fqname)? #quoted_collection
}}
};

Expand Down Expand Up @@ -1269,7 +1269,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
}

#[inline]
fn from_arrow_components(
fn from_arrow2_components(
arrow_data: impl IntoIterator<Item = (
ComponentName,
Box<dyn arrow2::array::Array>,
Expand Down
2 changes: 1 addition & 1 deletion crates/build/re_types_builder/src/codegen/rust/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {
DataType::Extension(fqname, datatype, _metadata) => {
if *recursive {
let fqname_use = quote_fqname_as_type_path(fqname);
quote!(<#fqname_use>::arrow_datatype())
quote!(<#fqname_use>::arrow2_datatype())
} else {
let datatype = ArrowDataTypeTokenizer(datatype.to_logical_type(), false);
quote!(#datatype)
Expand Down
12 changes: 6 additions & 6 deletions crates/build/re_types_builder/src/codegen/rust/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ use crate::{
/// This short-circuits on error using the `try` (`?`) operator: the outer scope must be one that
/// returns a `Result<_, DeserializationError>`!
///
/// There is a 1:1 relationship between `quote_arrow_deserializer` and `Loggable::from_arrow_opt`:
/// There is a 1:1 relationship between `quote_arrow_deserializer` and `Loggable::from_arrow2_opt`:
/// ```ignore
/// fn from_arrow_opt(data: &dyn ::arrow2::array::Array) -> DeserializationResult<Vec<Option<Self>>> {
/// fn from_arrow2_opt(data: &dyn ::arrow2::array::Array) -> DeserializationResult<Vec<Option<Self>>> {
/// Ok(#quoted_deserializer)
/// }
/// ```
Expand Down Expand Up @@ -60,7 +60,7 @@ pub fn quote_arrow_deserializer(
let data_src = format_ident!("arrow_data");

let datatype = &arrow_registry.get(&obj.fqname);
let quoted_self_datatype = quote! { Self::arrow_datatype() };
let quoted_self_datatype = quote! { Self::arrow2_datatype() };

let obj_fqname = obj.fqname.as_str();
let is_enum = obj.is_enum();
Expand Down Expand Up @@ -494,7 +494,7 @@ enum InnerRepr {
///
/// The `datatype` comes from our compile-time Arrow registry, not from the runtime payload!
/// If the datatype happens to be a struct or union, this will merely inject a runtime call to
/// `Loggable::from_arrow_opt` and call it a day, preventing code bloat.
/// `Loggable::from_arrow2_opt` and call it a day, preventing code bloat.
///
/// `data_src` is the runtime identifier of the variable holding the Arrow payload (`&dyn ::arrow2::array::Array`).
/// The returned `TokenStream` always instantiates a `Vec<Option<T>>`.
Expand All @@ -517,7 +517,7 @@ fn quote_arrow_field_deserializer(
if let DataType::Extension(fqname, _, _) = datatype {
if objects.get(fqname).map_or(false, |obj| obj.is_enum()) {
let fqname_use = quote_fqname_as_type_path(fqname);
return quote!(#fqname_use::from_arrow_opt(#data_src).with_context(#obj_field_fqname)?.into_iter());
return quote!(#fqname_use::from_arrow2_opt(#data_src).with_context(#obj_field_fqname)?.into_iter());
}
}

Expand Down Expand Up @@ -848,7 +848,7 @@ fn quote_arrow_field_deserializer(
unreachable!()
};
let fqname_use = quote_fqname_as_type_path(fqname);
quote!(#fqname_use::from_arrow_opt(#data_src).with_context(#obj_field_fqname)?.into_iter())
quote!(#fqname_use::from_arrow2_opt(#data_src).with_context(#obj_field_fqname)?.into_iter())
}

_ => unimplemented!("{datatype:#?}"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn generate_component_reflection(
ComponentReflection {
docstring_md: #docstring_md,
custom_placeholder: #custom_placeholder,
datatype: #type_name::arrow_datatype(),
datatype: #type_name::arrow2_datatype(),
}
};
quoted_pairs.push(quote! { (#quoted_name, #quoted_reflection) });
Expand Down
8 changes: 4 additions & 4 deletions crates/build/re_types_builder/src/codegen/rust/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn quote_arrow_serializer(
) -> TokenStream {
let datatype = &arrow_registry.get(&obj.fqname);

let quoted_datatype = quote! { Self::arrow_datatype() };
let quoted_datatype = quote! { Self::arrow2_datatype() };

let is_enum = obj.is_enum();
let is_arrow_transparent = obj.datatype.is_none();
Expand Down Expand Up @@ -51,7 +51,7 @@ pub fn quote_arrow_serializer(
let bitmap_dst = format_ident!("{quoted_data_dst}_bitmap");

// The choice of true or false for `elements_are_nullable` here is a bit confusing.
// This code-gen path forms the basis of `to_arrow_opt`, which implies that we
// This code-gen path forms the basis of `to_arrow2_opt`, which implies that we
// support nullable elements. Additionally, this MAY be used as a recursive code
// path when using an enum within a struct, and that struct within the field may
// be null, as such the elements are always handled as nullable.
Expand Down Expand Up @@ -485,7 +485,7 @@ fn quote_arrow_field_serializer(

return quote! {{
_ = #bitmap_src;
#fqname_use::to_arrow_opt(#data_src #option_wrapper)?
#fqname_use::to_arrow2_opt(#data_src #option_wrapper)?
}};
}
}
Expand Down Expand Up @@ -903,7 +903,7 @@ fn quote_arrow_field_serializer(

quote! {{
_ = #bitmap_src;
#fqname_use::to_arrow_opt(#data_src #option_wrapper)?
#fqname_use::to_arrow2_opt(#data_src #option_wrapper)?
}}
}

Expand Down
46 changes: 23 additions & 23 deletions crates/store/re_chunk/src/batcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1000,9 +1000,9 @@ mod tests {
let timepoint2 = TimePoint::default().with(timeline1, 43);
let timepoint3 = TimePoint::default().with(timeline1, 44);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down Expand Up @@ -1079,9 +1079,9 @@ mod tests {

let timeless = TimePoint::default();

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down Expand Up @@ -1155,9 +1155,9 @@ mod tests {
let timepoint2 = TimePoint::default().with(timeline1, 43);
let timepoint3 = TimePoint::default().with(timeline1, 44);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down Expand Up @@ -1271,9 +1271,9 @@ mod tests {
.with(timeline1, 44)
.with(timeline2, 1001);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down Expand Up @@ -1391,10 +1391,10 @@ mod tests {
let timepoint2 = TimePoint::default().with(timeline1, 43);
let timepoint3 = TimePoint::default().with(timeline1, 44);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 =
MyPoint64::to_arrow([MyPoint64::new(10.0, 20.0), MyPoint64::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
MyPoint64::to_arrow2([MyPoint64::new(10.0, 20.0), MyPoint64::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())]; // same name, different datatype
Expand Down Expand Up @@ -1516,11 +1516,11 @@ mod tests {
.with(timeline2, 1003)
.with(timeline1, 45);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points4 =
MyPoint::to_arrow([MyPoint::new(1000.0, 2000.0), MyPoint::new(3000.0, 4000.0)])?;
MyPoint::to_arrow2([MyPoint::new(1000.0, 2000.0), MyPoint::new(3000.0, 4000.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down Expand Up @@ -1630,11 +1630,11 @@ mod tests {
.with(timeline2, 1003)
.with(timeline1, 45);

let points1 = MyPoint::to_arrow([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points1 = MyPoint::to_arrow2([MyPoint::new(1.0, 2.0), MyPoint::new(3.0, 4.0)])?;
let points2 = MyPoint::to_arrow2([MyPoint::new(10.0, 20.0), MyPoint::new(30.0, 40.0)])?;
let points3 = MyPoint::to_arrow2([MyPoint::new(100.0, 200.0), MyPoint::new(300.0, 400.0)])?;
let points4 =
MyPoint::to_arrow([MyPoint::new(1000.0, 2000.0), MyPoint::new(3000.0, 4000.0)])?;
MyPoint::to_arrow2([MyPoint::new(1000.0, 2000.0), MyPoint::new(3000.0, 4000.0)])?;

let components1 = [(MyPoint::name(), points1.clone())];
let components2 = [(MyPoint::name(), points2.clone())];
Expand Down
10 changes: 5 additions & 5 deletions crates/store/re_chunk/src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ impl Chunk {
.collect_vec();

#[allow(clippy::unwrap_used)]
let row_ids = <RowId as Loggable>::to_arrow(&row_ids)
let row_ids = <RowId as Loggable>::to_arrow2(&row_ids)
// Unwrap: native RowIds cannot fail to serialize.
.unwrap()
.as_any()
Expand Down Expand Up @@ -295,7 +295,7 @@ impl Chunk {
.collect_vec();

#[allow(clippy::unwrap_used)]
let row_ids = <RowId as Loggable>::to_arrow(&row_ids)
let row_ids = <RowId as Loggable>::to_arrow2(&row_ids)
// Unwrap: native RowIds cannot fail to serialize.
.unwrap()
.as_any()
Expand Down Expand Up @@ -700,7 +700,7 @@ impl Chunk {
entity_path,
heap_size_bytes: Default::default(),
is_sorted: true,
row_ids: ArrowStructArray::new_empty(RowId::arrow_datatype()),
row_ids: ArrowStructArray::new_empty(RowId::arrow2_datatype()),
timelines: Default::default(),
components: Default::default(),
}
Expand Down Expand Up @@ -1264,11 +1264,11 @@ impl Chunk {

// Row IDs
{
if *row_ids.data_type().to_logical_type() != RowId::arrow_datatype() {
if *row_ids.data_type().to_logical_type() != RowId::arrow2_datatype() {
return Err(ChunkError::Malformed {
reason: format!(
"RowId data has the wrong datatype: expected {:?} but got {:?} instead",
RowId::arrow_datatype(),
RowId::arrow2_datatype(),
*row_ids.data_type(),
),
});
Expand Down
Loading

0 comments on commit 488f894

Please sign in to comment.