From 2ec12ac67595f80a7029d8e6dfa413c2841acbe0 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 27 May 2024 15:23:29 +0800 Subject: [PATCH] fix: merge into split logic for merge-into stmt with both matched and not-matched branch, right join is used, after got the joined result-set, a column of the result-set is chosen as the "split" column, i.e. for rows of the split column, those are nulls are recognized as no-matched, and others as matched. before this PR, an arbitrary column of target table is picked as the "split" column, which is unsafe, since the "arbitrary column" may have NULL values originally, which may lead to incorrectly treat a matched row as unmatched, and unexpected result (data duplication). --- .../interpreters/interpreter_merge_into.rs | 2 +- .../sql/src/planner/binder/merge_into.rs | 17 ++--------- .../sql/src/planner/optimizer/optimizer.rs | 1 - .../09_0040_merge_into_issue_15643.test | 28 +++++++++++++++++++ 4 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 tests/sqllogictests/suites/base/09_fuse_engine/09_0040_merge_into_issue_15643.test diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs index b10d0fe80b113..df7273841cf13 100644 --- a/src/query/service/src/interpreters/interpreter_merge_into.rs +++ b/src/query/service/src/interpreters/interpreter_merge_into.rs @@ -227,7 +227,7 @@ impl MergeIntoInterpreter { let insert_only = matches!(merge_type, MergeIntoType::InsertOnly); - let mut row_id_idx = if !insert_only && !target_build_optimization { + let mut row_id_idx = if !insert_only { match meta_data .read() .row_id_index_by_table_index(*target_table_idx) diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs index 61f3fd82d6598..6df7be89062bc 100644 --- a/src/query/sql/src/planner/binder/merge_into.rs +++ b/src/query/sql/src/planner/binder/merge_into.rs @@ -58,7 +58,6 @@ use crate::IndexType; use crate::ScalarBinder; use crate::ScalarExpr; use crate::Visibility; -use crate::DUMMY_COLUMN_INDEX; #[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] pub enum MergeIntoType { @@ -438,20 +437,8 @@ impl Binder { .await?, ); } - let mut split_idx = DUMMY_COLUMN_INDEX; - // find any target table column index for merge_into_split - for column in self - .metadata - .read() - .columns_by_table_index(table_index) - .iter() - { - if column.index() != row_id_index { - split_idx = column.index(); - break; - } - } - assert!(split_idx != DUMMY_COLUMN_INDEX); + + let split_idx = row_id_index; Ok(MergeInto { catalog: catalog_name.to_string(), diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 2506f353b1d79..d04d833ee8fe1 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -494,7 +494,6 @@ async fn optimize_merge_into(opt_ctx: OptimizerContext, plan: Box) -> == 0 && flag { - new_columns_set.remove(&plan.row_id_index); opt_ctx.table_ctx.set_merge_into_join(MergeIntoJoin { merge_into_join_type: MergeIntoJoinType::Left, is_distributed: false, diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0040_merge_into_issue_15643.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0040_merge_into_issue_15643.test new file mode 100644 index 0000000000000..9e53be7883358 --- /dev/null +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0040_merge_into_issue_15643.test @@ -0,0 +1,28 @@ +statement ok +create or replace database m_test; + +statement ok +use m_test; + +statement ok +create table t(a string, b string, c string, d string, k string); + +statement ok +create table s(a string, b string, c string, d string, k string); + +statement ok +insert into t(k) values('k'); + +statement ok +insert into s(k) values('k'); + + +query II +merge into t using s on t.k = s.k when matched then update * when not matched then insert *; +---- +0 1 + +query TTTTT +select * from t; +---- +NULL NULL NULL NULL k