-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: support unnest
multiple arrays
#10044
Changes from 11 commits
d9fd370
393c0fb
b73c884
d4a8625
5fa7894
8ced636
992d0b2
5d6c932
92a7efa
6b82040
5ad6d77
8e7af12
c20546c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1112,7 +1112,7 @@ impl LogicalPlanBuilder { | |
|
||
/// Unnest the given column. | ||
pub fn unnest_column(self, column: impl Into<Column>) -> Result<Self> { | ||
Ok(Self::from(unnest(self.plan, column.into())?)) | ||
Ok(Self::from(unnest(self.plan, vec![column.into()])?)) | ||
} | ||
|
||
/// Unnest the given column given [`UnnestOptions`] | ||
|
@@ -1123,10 +1123,21 @@ impl LogicalPlanBuilder { | |
) -> Result<Self> { | ||
Ok(Self::from(unnest_with_options( | ||
self.plan, | ||
column.into(), | ||
vec![column.into()], | ||
options, | ||
)?)) | ||
} | ||
|
||
/// Unnest the given columns with the given [`UnnestOptions`] | ||
pub fn unnest_columns_with_options( | ||
self, | ||
columns: Vec<Column>, | ||
options: UnnestOptions, | ||
) -> Result<Self> { | ||
Ok(Self::from(unnest_with_options( | ||
self.plan, columns, options, | ||
)?)) | ||
} | ||
} | ||
pub fn change_redundant_column(fields: &Fields) -> Vec<Field> { | ||
let mut name_map = HashMap::new(); | ||
|
@@ -1534,44 +1545,50 @@ impl TableSource for LogicalTableSource { | |
} | ||
|
||
/// Create a [`LogicalPlan::Unnest`] plan | ||
pub fn unnest(input: LogicalPlan, column: Column) -> Result<LogicalPlan> { | ||
unnest_with_options(input, column, UnnestOptions::new()) | ||
pub fn unnest(input: LogicalPlan, columns: Vec<Column>) -> Result<LogicalPlan> { | ||
unnest_with_options(input, columns, UnnestOptions::new()) | ||
} | ||
|
||
/// Create a [`LogicalPlan::Unnest`] plan with options | ||
pub fn unnest_with_options( | ||
input: LogicalPlan, | ||
column: Column, | ||
columns: Vec<Column>, | ||
options: UnnestOptions, | ||
) -> Result<LogicalPlan> { | ||
let (unnest_qualifier, unnest_field) = | ||
input.schema().qualified_field_from_column(&column)?; | ||
|
||
// Extract the type of the nested field in the list. | ||
let unnested_field = match unnest_field.data_type() { | ||
DataType::List(field) | ||
| DataType::FixedSizeList(field, _) | ||
| DataType::LargeList(field) => Arc::new(Field::new( | ||
unnest_field.name(), | ||
field.data_type().clone(), | ||
unnest_field.is_nullable(), | ||
)), | ||
_ => { | ||
// If the unnest field is not a list type return the input plan. | ||
return Ok(input); | ||
} | ||
}; | ||
let mut unnested_fields: HashMap<usize, _> = HashMap::with_capacity(columns.len()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 |
||
// Add qualifiers to the columns. | ||
let mut qualified_columns = Vec::with_capacity(columns.len()); | ||
for c in &columns { | ||
let index = input.schema().index_of_column(c)?; | ||
let (unnest_qualifier, unnest_field) = input.schema().qualified_field(index); | ||
let unnested_field = match unnest_field.data_type() { | ||
DataType::List(field) | ||
| DataType::FixedSizeList(field, _) | ||
| DataType::LargeList(field) => Arc::new(Field::new( | ||
unnest_field.name(), | ||
field.data_type().clone(), | ||
// Unnesting may produce NULLs even if the list is not null. | ||
// For example: unnset([1], []) -> 1, null | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although the test does not cover this case, sadly I don't think there is currently a way to create a list with nullable as false 🤔 Maybe we can write a simple rust test for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We can do it in another PR! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
👌 |
||
true, | ||
)), | ||
_ => { | ||
// If the unnest field is not a list type return the input plan. | ||
return Ok(input); | ||
} | ||
}; | ||
qualified_columns.push(Column::from((unnest_qualifier, unnested_field.as_ref()))); | ||
unnested_fields.insert(index, unnested_field); | ||
} | ||
|
||
// Update the schema with the unnest column type changed to contain the nested type. | ||
// Update the schema with the unnest column types changed to contain the nested types. | ||
let input_schema = input.schema(); | ||
let fields = input_schema | ||
.iter() | ||
.map(|(q, f)| { | ||
if f.as_ref() == unnest_field && q == unnest_qualifier { | ||
(unnest_qualifier.cloned(), unnested_field.clone()) | ||
} else { | ||
(q.cloned(), f.clone()) | ||
} | ||
.enumerate() | ||
.map(|(index, (q, f))| match unnested_fields.get(&index) { | ||
Some(unnested_field) => (q.cloned(), unnested_field.clone()), | ||
None => (q.cloned(), f.clone()), | ||
}) | ||
.collect::<Vec<_>>(); | ||
|
||
|
@@ -1580,11 +1597,9 @@ pub fn unnest_with_options( | |
// We can use the existing functional dependencies: | ||
let deps = input_schema.functional_dependencies().clone(); | ||
let schema = Arc::new(df_schema.with_functional_dependencies(deps)?); | ||
let column = Column::from((unnest_qualifier, unnested_field.as_ref())); | ||
|
||
Ok(LogicalPlan::Unnest(Unnest { | ||
input: Arc::new(input), | ||
column, | ||
columns: qualified_columns, | ||
schema, | ||
options, | ||
})) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the expression position, only one argument can be accepted.
This comment was marked as outdated.
Sorry, something went wrong.