Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Natural join #3364

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;

use rayexec_bullet::datatype::DataType;
Expand Down Expand Up @@ -550,7 +550,41 @@ impl<'a> FromBinder<'a> {
(Vec::new(), using_cols)
}
ast::JoinCondition::Natural => {
not_implemented!("NATURAL join")
// Get tables refs from the left.
//
// We want to prune these tables out from the right. Tables are
// implicitly in scope on the right for lateral references.
let left_tables: HashSet<_> = bind_context
.iter_tables_in_scope(left_idx)?
.map(|table| table.reference)
.collect();

// Get columns from the left.
let left_cols: HashSet<_> = bind_context
.iter_tables_in_scope(left_idx)?
.flat_map(|table| table.column_names.iter())
.collect();

// Get columns from the right, skipping columns from tables that
// would generate a lateral reference.
let right_cols = bind_context
.iter_tables_in_scope(right_idx)?
.filter(|table| !left_tables.contains(&table.reference))
.flat_map(|table| table.column_names.iter());

let mut common = Vec::new();

// Now collect the columns that are common in both.
//
// Manually iterate over using a hash set intersection to keep
// the order of columns consistent.
for right_col in right_cols {
if left_cols.contains(right_col) {
common.push(right_col.clone());
}
}

(Vec::new(), common)
}
ast::JoinCondition::None => (Vec::new(), Vec::new()),
};
Expand Down Expand Up @@ -612,8 +646,16 @@ impl<'a> FromBinder<'a> {
},
};

// Add USING column to _current_ scope.
bind_context.append_using_column(self.current, using_column)?;
// Add USING column to _current_ scope if we don't already have an
// equivalent column in our using set.
let already_using = bind_context
.get_using_columns(self.current)?
.iter()
.any(|c| c.column == using_column.column);

if !already_using {
bind_context.append_using_column(self.current, using_column)?;
}

// Generate additional equality condition.
// TODO: Probably make this a method on the expr binder. Easy to miss the cast.
Expand Down
36 changes: 35 additions & 1 deletion crates/rayexec_parser/src/ast/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ impl AstParseable for FromNode<Raw> {
}),
}
} else {
// Optional NATURAL prefixing the join type.
let natural = parser.parse_keyword(Keyword::NATURAL);

let kw = match parser.peek() {
Some(tok) => match tok.keyword() {
Some(kw) => kw,
Expand Down Expand Up @@ -166,7 +169,13 @@ impl AstParseable for FromNode<Raw> {
parser.parse_parenthesized_comma_separated(Ident::parse)?,
)
}
_ => JoinCondition::None,
_ => {
if natural {
JoinCondition::Natural
} else {
JoinCondition::None
}
}
};

node = FromNode {
Expand Down Expand Up @@ -688,4 +697,29 @@ mod tests {
};
assert_eq!(expected, node, "left:\n{expected:#?}\nright:\n{node:#?}");
}

#[test]
fn natural_inner_join_lateral() {
let node: FromNode<_> = parse_ast("t1 NATURAL INNER JOIN t2").unwrap();
let expected = FromNode {
alias: None,
body: FromNodeBody::Join(FromJoin {
left: Box::new(FromNode {
alias: None,
body: FromNodeBody::BaseTable(FromBaseTable {
reference: ObjectReference::from_strings(["t1"]),
}),
}),
right: Box::new(FromNode {
alias: None,
body: FromNodeBody::BaseTable(FromBaseTable {
reference: ObjectReference::from_strings(["t2"]),
}),
}),
join_type: JoinType::Inner,
join_condition: JoinCondition::Natural,
}),
};
assert_eq!(expected, node, "left:\n{expected:#?}\nright:\n{node:#?}");
}
}
95 changes: 95 additions & 0 deletions slt/standard/join/natural_join.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Natural join tests

statement ok
CREATE TEMP TABLE t1 (num INT, name TEXT);

statement ok
CREATE TEMP TABLE t2 (num INT, value TEXT);

statement ok
INSERT INTO t1 VALUES (1, 'a'), (2, 'b'), (3, 'c');

statement ok
INSERT INTO t2 VALUES (1, 'xxx'), (3, 'yyy'), (5, 'zzz');

query ITT
SELECT * FROM t1 NATURAL INNER JOIN t2 ORDER BY num;
----
1 a xxx
3 c yyy

# Order by qualified name.
query ITT
SELECT * FROM t1 NATURAL INNER JOIN t2 ORDER BY t1.num;
----
1 a xxx
3 c yyy

query ITT
SELECT * FROM t1 NATURAL INNER JOIN t2 ORDER BY t2.num;
----
1 a xxx
3 c yyy

statement ok
CREATE TEMP TABLE t3 (extra TEXT, num INT);

statement ok
INSERT INTO t3 VALUES ('cat', 3), ('dog', 4), ('goose', 5);

query ITTT
SELECT * FROM t1 NATURAL INNER JOIN t2 NATURAL INNER JOIN t3;
----
3 c yyy cat

query IITT
SELECT t1.num, * FROM t1 NATURAL INNER JOIN t2 NATURAL INNER JOIN t3;
----
3 3 c yyy cat

query I
SELECT t1.num FROM t1 NATURAL INNER JOIN t2 NATURAL INNER JOIN t3;
----
3

query ITTT
SELECT * FROM t1 NATURAL INNER JOIN t2 NATURAL LEFT JOIN t3 ORDER BY 1;
----
1 a xxx NULL
3 c yyy cat

query ITTT
SELECT * FROM t1 NATURAL RIGHT JOIN t2 NATURAL LEFT JOIN t3 ORDER BY value;
----
1 a xxx NULL
3 c yyy cat
5 NULL zzz goose

query ITTT
SELECT * FROM t1 NATURAL RIGHT JOIN t2 NATURAL LEFT JOIN t3 ORDER BY t3.num, t1.num;
----
3 c yyy cat
5 NULL zzz goose
1 a xxx NULL

# USING columns from subqueries
query ITT
SELECT * FROM t1 NATURAL JOIN (SELECT * FROM t2) ORDER BY 1;
----
1 a xxx
3 c yyy

query IT
SELECT * FROM t1 NATURAL JOIN (SELECT 3) s(num);
----
3 c

query IT
SELECT * FROM t1 NATURAL JOIN (SELECT 3) s(num) ORDER BY 1;
----
3 c

query IT
SELECT * FROM t1 NATURAL JOIN (SELECT 3 AS num);
----
3 c
Loading