Skip to content

Commit

Permalink
feat: StructDType from_iter (#2003)
Browse files Browse the repository at this point in the history
Added a `StructDType::from_iter` and used it. This should remove some
interm Vecs.
  • Loading branch information
joseph-isaacs authored Jan 17, 2025
1 parent f8caed0 commit dd09060
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 58 deletions.
12 changes: 7 additions & 5 deletions vortex-array/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,19 @@ impl StructArray {
}

pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayData)]) -> VortexResult<Self> {
let names: Vec<FieldName> = items
.iter()
.map(|(name, _)| FieldName::from(name.as_ref()))
.collect();
let names = items.iter().map(|(name, _)| FieldName::from(name.as_ref()));
let fields: Vec<ArrayData> = items.iter().map(|(_, array)| array.clone()).collect();
let len = fields
.first()
.map(|f| f.len())
.ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;

Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable)
Self::try_new(
FieldNames::from_iter(names),
fields,
len,
Validity::NonNullable,
)
}

// TODO(aduffy): Add equivalent function to support field masks for nested column access.
Expand Down
43 changes: 18 additions & 25 deletions vortex-array/src/arrow/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
use std::sync::Arc;

use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaBuilder, SchemaRef};
use itertools::Itertools;
use vortex_datetime_dtype::arrow::{make_arrow_temporal_dtype, make_temporal_ext_dtype};
use vortex_datetime_dtype::is_temporal_ext_type;
use vortex_dtype::{DType, Nullability, PType, StructDType};
use vortex_dtype::{DType, FieldName, Nullability, PType, StructDType};
use vortex_error::{vortex_bail, vortex_err, VortexResult};

use crate::arrow::{FromArrowType, TryFromArrowType};
Expand Down Expand Up @@ -46,23 +45,23 @@ impl TryFromArrowType<&DataType> for PType {
impl FromArrowType<SchemaRef> for DType {
fn from_arrow(value: SchemaRef) -> Self {
Self::Struct(
StructDType::new(
value
.fields()
.iter()
.map(|f| f.name().as_str().into())
.collect(),
value
.fields()
.iter()
.map(|f| Self::from_arrow(f.as_ref()))
.collect_vec(),
),
StructDType::from_arrow(value.fields()),
Nullability::NonNullable, // Must match From<RecordBatch> for Array
)
}
}

impl FromArrowType<&Fields> for StructDType {
fn from_arrow(value: &Fields) -> Self {
StructDType::from_iter(value.into_iter().map(|f| {
(
FieldName::from(f.name().as_str()),
DType::from_arrow(f.as_ref()),
)
}))
}
}

impl FromArrowType<&Field> for DType {
fn from_arrow(field: &Field) -> Self {
use vortex_dtype::DType::*;
Expand All @@ -88,13 +87,7 @@ impl FromArrowType<&Field> for DType {
DataType::List(e) | DataType::LargeList(e) => {
List(Arc::new(Self::from_arrow(e.as_ref())), nullability)
}
DataType::Struct(f) => Struct(
StructDType::new(
f.iter().map(|f| f.name().as_str().into()).collect(),
f.iter().map(|f| Self::from_arrow(f.as_ref())).collect_vec(),
),
nullability,
),
DataType::Struct(f) => Struct(StructDType::from_arrow(f), nullability),
_ => unimplemented!("Arrow data type not yet supported: {:?}", field.data_type()),
}
}
Expand Down Expand Up @@ -207,10 +200,10 @@ mod test {

assert_eq!(
infer_data_type(&DType::Struct(
StructDType::new(
FieldNames::from(vec![FieldName::from("field_a"), FieldName::from("field_b")]),
vec![DType::Bool(false.into()), DType::Utf8(true.into())],
),
StructDType::from_iter([
("field_a", DType::Bool(false.into())),
("field_b", DType::Utf8(true.into()))
]),
Nullability::NonNullable,
))
.unwrap(),
Expand Down
8 changes: 3 additions & 5 deletions vortex-dtype/src/serde/flatbuffers/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,14 @@ pub fn project_and_deserialize(
.ok_or_else(|| vortex_err!("The top-level type should be a struct"))?;
let nullability = fb_struct.nullable().into();

let (names, dtypes): (Vec<Arc<str>>, Vec<DType>) = projection
let struct_dtype = projection
.iter()
.map(|f| resolve_field(fb_struct, f))
.map(|idx| idx.and_then(|i| read_field(fb_struct, i, buffer)))
.collect::<VortexResult<Vec<_>>>()?
.into_iter()
.unzip();
.collect::<VortexResult<Vec<_>>>()?;

Ok(DType::Struct(
StructDType::new(names.into(), dtypes),
StructDType::from_iter(struct_dtype),
nullability,
))
}
Expand Down
21 changes: 16 additions & 5 deletions vortex-dtype/src/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use vortex_error::{
};

use crate::flatbuffers::ViewedDType;
use crate::{DType, Field, FieldNames};
use crate::{DType, Field, FieldName, FieldNames};

/// DType of a struct's field, either owned or a pointer to an underlying flatbuffer.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
Expand Down Expand Up @@ -290,6 +290,20 @@ impl StructDType {
}
}

impl<T, V> FromIterator<(T, V)> for StructDType
where
T: Into<FieldName>,
V: Into<FieldDType>,
{
fn from_iter<I: IntoIterator<Item = (T, V)>>(iter: I) -> Self {
let (names, dtypes): (Vec<_>, Vec<_>) = iter
.into_iter()
.map(|(name, dtype)| (name.into(), dtype.into()))
.unzip();
StructDType::from_fields(names.into(), dtypes.into_iter().map(Into::into).collect())
}
}

#[cfg(test)]
mod test {
use crate::dtype::DType;
Expand All @@ -316,10 +330,7 @@ mod test {
let b_type = DType::Bool(Nullability::NonNullable);

let dtype = DType::Struct(
StructDType::new(
vec!["A".into(), "B".into()].into(),
vec![a_type.clone(), b_type.clone()],
),
StructDType::from_iter([("A", a_type.clone()), ("B", b_type.clone())]),
Nullability::Nullable,
);
assert!(dtype.is_nullable());
Expand Down
19 changes: 8 additions & 11 deletions vortex-expr/src/transform/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,20 +358,17 @@ mod tests {

fn dtype() -> DType {
DType::Struct(
StructDType::new(
vec!["a".into(), "b".into(), "c".into()].into(),
vec![
StructDType::from_iter([
(
"a",
DType::Struct(
StructDType::new(
vec!["a".into(), "b".into()].into(),
vec![I32.into(), I32.into()],
),
StructDType::from_iter([("a", I32.into()), ("b", DType::from(I32))]),
NonNullable,
),
I32.into(),
I32.into(),
],
),
),
("b", I32.into()),
("c", I32.into()),
]),
NonNullable,
)
}
Expand Down
11 changes: 4 additions & 7 deletions vortex-layout/src/layouts/chunked/stats_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,11 @@ impl StatsTable {

/// Returns the DType of the statistics table given a set of statistics and column [`DType`].
pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType {
let dtypes = present_stats
.iter()
.map(|s| s.dtype(column_dtype).as_nullable())
.collect();
DType::Struct(
StructDType::new(
present_stats.iter().map(|s| s.name().into()).collect(),
dtypes,
StructDType::from_iter(
present_stats
.iter()
.map(|stat| (stat.name(), stat.dtype(column_dtype).as_nullable())),
),
Nullability::NonNullable,
)
Expand Down

0 comments on commit dd09060

Please sign in to comment.