From f38085d4c81ca3fbbdae34af600ea18c93087c2b Mon Sep 17 00:00:00 2001 From: wiedld Date: Tue, 27 Aug 2024 10:26:29 -0700 Subject: [PATCH] feat(12118): logical plan support for BinaryView --- .../substrait/src/logical_plan/consumer.rs | 2 ++ .../substrait/src/logical_plan/producer.rs | 11 +++++++++++ .../tests/cases/roundtrip_logical_plan.rs | 17 +++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index dd24d0f8d688..980bbdded22c 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -1432,6 +1432,7 @@ fn from_substrait_type( r#type::Kind::Binary(binary) => match binary.type_variation_reference { DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Binary), LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeBinary), + VIEW_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::BinaryView), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), @@ -1770,6 +1771,7 @@ fn from_substrait_literal( LARGE_CONTAINER_TYPE_VARIATION_REF => { ScalarValue::LargeBinary(Some(b.clone())) } + VIEW_CONTAINER_TYPE_VARIATION_REF => ScalarValue::BinaryView(Some(b.clone())), others => { return substrait_err!("Unknown type variation reference {others}"); } diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index 2df43d4e239d..d19797656a5e 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -1462,6 +1462,12 @@ fn to_substrait_type( nullability, })), }), + DataType::BinaryView => Ok(substrait::proto::Type { + kind: Some(r#type::Kind::Binary(r#type::Binary { + type_variation_reference: VIEW_CONTAINER_TYPE_VARIATION_REF, + nullability, + })), + }), DataType::Utf8 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::String(r#type::String { type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF, @@ -1920,6 +1926,10 @@ fn to_substrait_literal( LiteralType::Binary(b.clone()), LARGE_CONTAINER_TYPE_VARIATION_REF, ), + ScalarValue::BinaryView(Some(b)) => ( + LiteralType::Binary(b.clone()), + VIEW_CONTAINER_TYPE_VARIATION_REF, + ), ScalarValue::FixedSizeBinary(_, Some(b)) => ( LiteralType::FixedBinary(b.clone()), DEFAULT_TYPE_VARIATION_REF, @@ -2361,6 +2371,7 @@ mod test { round_trip_type(DataType::Binary)?; round_trip_type(DataType::FixedSizeBinary(10))?; round_trip_type(DataType::LargeBinary)?; + round_trip_type(DataType::BinaryView)?; round_trip_type(DataType::Utf8)?; round_trip_type(DataType::LargeUtf8)?; round_trip_type(DataType::Utf8View)?; diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index f82b72d360da..3f4d9df0de14 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -716,6 +716,7 @@ async fn all_type_literal() -> Result<()> { date32_col = arrow_cast('2020-01-01', 'Date32') AND binary_col = arrow_cast('binary', 'Binary') AND large_binary_col = arrow_cast('large_binary', 'LargeBinary') AND + view_binary_col = arrow_cast(arrow_cast('binary_view', 'Binary'), 'BinaryView') AND utf8_col = arrow_cast('utf8', 'Utf8') AND large_utf8_col = arrow_cast('large_utf8', 'LargeUtf8') AND view_utf8_col = arrow_cast('utf8_view', 'Utf8View');", @@ -723,6 +724,21 @@ async fn all_type_literal() -> Result<()> { .await } +/// Arrow-cast does not currently handle direct casting from utf8 to binaryView. +#[tokio::test] +async fn binaryview_type_literal_needs_casting_fix() -> Result<()> { + let err = roundtrip_all_types( + "select * from data where + view_binary_col = arrow_cast('binary_view', 'BinaryView');", + ) + .await; + + assert!( + matches!(err, Err(e) if e.to_string().contains("Unsupported CAST from Utf8 to BinaryView")) + ); + Ok(()) +} + #[tokio::test] async fn roundtrip_literal_list() -> Result<()> { roundtrip("SELECT [[1,2,3], [], NULL, [NULL]] FROM data").await @@ -1232,6 +1248,7 @@ async fn create_all_type_context() -> Result { Field::new("date64_col", DataType::Date64, true), Field::new("binary_col", DataType::Binary, true), Field::new("large_binary_col", DataType::LargeBinary, true), + Field::new("view_binary_col", DataType::BinaryView, true), Field::new("fixed_size_binary_col", DataType::FixedSizeBinary(42), true), Field::new("utf8_col", DataType::Utf8, true), Field::new("large_utf8_col", DataType::LargeUtf8, true),