From b5537e0a0f9839794d2df67b6df1d9c2972daa96 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sat, 18 May 2024 10:21:22 +0800 Subject: [PATCH 1/9] feat(functions): add map_insert function --- src/query/functions/src/scalars/map.rs | 51 +++++++++++++++++++ src/query/functions/tests/it/scalars/map.rs | 29 +++++++++++ .../it/scalars/testdata/function_list.txt | 3 ++ .../tests/it/scalars/testdata/map.txt | 46 +++++++++++++++++ .../query/functions/02_0074_function_map.test | 29 +++++++++++ 5 files changed, 158 insertions(+) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index be3c4f5c74fd..998d9b99abc2 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -27,8 +27,10 @@ use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberType; use databend_common_expression::types::SimpleDomain; +use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; +use databend_common_expression::vectorize_with_builder_3_arg; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; use databend_common_expression::Value; @@ -244,4 +246,53 @@ pub fn register(registry: &mut FunctionRegistry) { .any(|(k, _)| k == key) }, ); + + registry.register_3_arg_core::, GenericType<1>, MapType, GenericType<1>>, _, _>( + "map_insert", + |_, _, _, _| FunctionDomain::Full, + |_, key, value, ctx| { + let mut b = ArrayType::create_builder(1, ctx.generics); + b.put_item((key.into_scalar().unwrap(), value.into_scalar().unwrap())); + b.commit_row(); + + return Value::Scalar(MapType::build_scalar(b)); + }, + ); + + registry.register_passthrough_nullable_3_arg( + "map_insert", + |_, domian1, _, _| { + FunctionDomain::Domain(match domian1 { + Some((key_domain, val_domain)) => Some(( + key_domain.clone(), + val_domain.clone(), + )), + None => None, + }) + }, + vectorize_with_builder_3_arg::< + MapType, GenericType<1>>, + GenericType<0>, + GenericType<1>, + MapType, GenericType<1>>, + >(|source, key, value, output, ctx| { + // check if key already exists in the map + let duplicate_key = source.iter().any(|(k, _)| k == key); + + let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); + for (k, v) in source.iter() { + if k == key { + new_map.put_item((k.clone(), value.clone())); + continue; + } + new_map.put_item((k.clone(), v.clone())); + } + if !duplicate_key { + new_map.put_item((key.clone(), value.clone())); + } + new_map.commit_row(); + + output.append_column(&new_map.build()); + }), + ); } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 909bc3ddb8fe..40d021ca3cb3 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -32,6 +32,7 @@ fn test_map() { test_map_size(file); test_map_cat(file); test_map_contains_key(file); + test_map_insert(file) } fn test_map_cat(file: &mut impl Write) { @@ -278,3 +279,31 @@ fn test_map_size(file: &mut impl Write) { &columns, ); } + +fn test_map_insert(file: &mut impl Write) { + run_ast(file, "map_insert({}, 'k1', 'v1')", &[]); + run_ast(file, "map_insert({'k1': 'v1'}, 'k2', 'v2')", &[]); + + let columns = [ + ("a_col", StringType::from_data(vec!["a", "b", "c"])), + ("b_col", StringType::from_data(vec!["d", "e", "f"])), + ("c_col", StringType::from_data(vec!["x", "y", "z"])), + ( + "d_col", + StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]), + ), + ( + "e_col", + StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]), + ), + ( + "f_col", + StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]), + ), + ]; + run_ast( + file, + "map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'k1', 'v10')", + &columns, + ); +} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index fd840cdff97f..9829663dde25 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2449,6 +2449,9 @@ Functions overloads: 0 map_size(Map(Nothing)) :: UInt8 1 map_size(Map(T0, T1)) :: UInt64 2 map_size(Map(T0, T1) NULL) :: UInt64 NULL +0 map_insert(Map(Nothing), T0, T1) :: Map(Nothing, T1) +1 map_insert(Map(T0, T1), T0, T2) :: Map(T0, T2) +2 map_insert(Map(T0, T1) NULL, T0 NULL, T2 NULL) :: Map(T0, T2) NULL 0 map_values(Map(Nothing)) :: Array(Nothing) 1 map_values(Map(T0, T1)) :: Array(T1) 2 map_values(Map(T0, T1) NULL) :: Array(T1) NULL diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 7149c7a324df..a13862f6f63d 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -617,3 +617,49 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ +ast : map_insert({}, 'k1', 'v1') +raw expr : map_insert(map(array(), array()), 'k1', 'v1') +checked expr : map_insert(map(array<>(), array<>()), "k1", "v1") +optimized expr : {"k1":"v1"} +output type : Map(String, String) +output domain : {[{"k1"..="k1"}], [{"v1"..="v1"}]} +output : {'k1':'v1'} + + +ast : map_insert({'k1': 'v1'}, 'k2', 'v2') +raw expr : map_insert(map(array('k1'), array('v1')), 'k2', 'v2') +checked expr : map_insert(map(array("k1"), array("v1")), "k2", "v2") +optimized expr : {"k1":"v1", "k2":"v2"} +output type : Map(String, String) +output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} +output : {'k1':'v1', 'k2':'v2'} + + +ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'k1', 'v10') +raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'k1', 'v10') +checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "k1", CAST("v10" AS String NULL)) +optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "k1", "v10") +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'd':'v4', 'x':'v6', 'k1':'v10'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2', 'e':'v5', 'y':NULL, 'k1':'v10'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'k1':'v10'} | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786b316265796b3163667a6b31, offsets: [0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15] }, NullableColumn { column: StringColumn { data: 0x7631763476367631307632763576313076337637763130, offsets: [0, 2, 4, 6, 9, 11, 13, 13, 16, 18, 18, 20, 23] }, validity: [0b10111111, 0b____1101] }]), offsets: [0, 4, 8, 12] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index c629b74e135e..8276e04c9a77 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -167,5 +167,34 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2') ---- 1 +query +SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k3', NULL) +---- +{'k1':'v1','k2':'v2','k3':NULL} + +query +SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'new_v1') +---- +{'k1':'new_v1','k2':'v2'} + +query +SELECT map_insert({}, 'k1', 'v1') +---- +{'k1':'v1'} + +statement ok +CREATE TABLE map_insert_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null); + +statement ok +INSERT INTO map_insert_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, NULL); + +query +SELECT map_insert(col_int, 'a', 30) +FROM map_insert_test; +---- +{'a':30,'b':20} +{'d':40,'e':NULL,'f':50,'a':30} +NULL + statement ok DROP DATABASE map_func_test From d527a7bad7d1cde413e6bcd49dd96e68683239e0 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sat, 18 May 2024 10:50:02 +0800 Subject: [PATCH 2/9] feat(functions): check ci --- src/query/functions/src/scalars/map.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 998d9b99abc2..bd75b6d4c5de 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -263,10 +263,7 @@ pub fn register(registry: &mut FunctionRegistry) { "map_insert", |_, domian1, _, _| { FunctionDomain::Domain(match domian1 { - Some((key_domain, val_domain)) => Some(( - key_domain.clone(), - val_domain.clone(), - )), + Some((key_domain, val_domain)) => Some((key_domain.clone(), val_domain.clone())), None => None, }) }, From fa0c45d44b309c0e55f5d7e2e9f9317338cab186 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sun, 19 May 2024 10:38:36 +0800 Subject: [PATCH 3/9] feat(functions): domain fix --- src/query/functions/src/scalars/map.rs | 29 ++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index bd75b6d4c5de..69d9c6f98c7c 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -249,7 +249,12 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_3_arg_core::, GenericType<1>, MapType, GenericType<1>>, _, _>( "map_insert", - |_, _, _, _| FunctionDomain::Full, + |_, _, insert_key_domain, insert_value_domain| { + FunctionDomain::Domain(Some(( + insert_key_domain.clone(), + insert_value_domain.clone(), + ))) + }, |_, key, value, ctx| { let mut b = ArrayType::create_builder(1, ctx.generics); b.put_item((key.into_scalar().unwrap(), value.into_scalar().unwrap())); @@ -261,10 +266,13 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_3_arg( "map_insert", - |_, domian1, _, _| { - FunctionDomain::Domain(match domian1 { - Some((key_domain, val_domain)) => Some((key_domain.clone(), val_domain.clone())), - None => None, + |_, domain1, key_domain, value_domain| { + FunctionDomain::Domain(match (domain1, key_domain, value_domain) { + (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( + key_domain.merge(insert_key_domain), + val_domain.merge(insert_value_domain), + )), + (None, _, _) => None, }) }, vectorize_with_builder_3_arg::< @@ -273,9 +281,18 @@ pub fn register(registry: &mut FunctionRegistry) { GenericType<1>, MapType, GenericType<1>>, >(|source, key, value, output, ctx| { + // insert operation only works on specific key type: boolean, string, numeric, decimal, date, datetime + let key_type = &ctx.generics[0]; + if !key_type.is_boolean() + && !key_type.is_string() + && !key_type.is_numeric() + && !key_type.is_decimal() + && !key_type.is_date_or_date_time() { + ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); + } + // check if key already exists in the map let duplicate_key = source.iter().any(|(k, _)| k == key); - let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); for (k, v) in source.iter() { if k == key { From ea0c1d4b46d03c16dd5f429c22a8b832fed050a5 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sun, 19 May 2024 15:32:24 +0800 Subject: [PATCH 4/9] feat(functions): add new params format in map_insert --- src/query/functions/src/scalars/map.rs | 90 +++++++++++++++---- src/query/functions/tests/it/scalars/map.rs | 20 +++++ .../tests/it/scalars/testdata/map.txt | 74 +++++++++++++++ .../query/functions/02_0074_function_map.test | 18 +++- 4 files changed, 181 insertions(+), 21 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 69d9c6f98c7c..347ab5f3eff1 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -15,6 +15,9 @@ use std::collections::HashSet; use std::hash::Hash; +use databend_common_expression::types::array::ArrayColumn; +use databend_common_expression::types::map::KvColumn; +use databend_common_expression::types::map::KvPair; use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; @@ -31,8 +34,11 @@ use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; use databend_common_expression::vectorize_with_builder_3_arg; +use databend_common_expression::vectorize_with_builder_4_arg; +use databend_common_expression::EvalContext; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; +use databend_common_expression::ScalarRef; use databend_common_expression::Value; use databend_common_hashtable::StackHashSet; use siphasher::sip128::Hasher128; @@ -284,29 +290,79 @@ pub fn register(registry: &mut FunctionRegistry) { // insert operation only works on specific key type: boolean, string, numeric, decimal, date, datetime let key_type = &ctx.generics[0]; if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() { + && !key_type.is_string() + && !key_type.is_numeric() + && !key_type.is_decimal() + && !key_type.is_date_or_date_time() { ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } - // check if key already exists in the map - let duplicate_key = source.iter().any(|(k, _)| k == key); - let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); - for (k, v) in source.iter() { - if k == key { - new_map.put_item((k.clone(), value.clone())); - continue; - } - new_map.put_item((k.clone(), v.clone())); + // default behavior is to insert new key-value pair, and if the key already exists, update the value. + output.append_column(&build_new_map(&source, key, value, ctx)); + }), + ); + + // grammar: map_insert(map, insert_key, insert_value, allow_update) + registry.register_passthrough_nullable_4_arg( + "map_insert", + |_, domain1, key_domain, value_domain, _| + FunctionDomain::Domain(match (domain1, key_domain, value_domain) { + (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( + key_domain.merge(insert_key_domain), + val_domain.merge(insert_value_domain), + )), + (None, _, _) => None, + }), + vectorize_with_builder_4_arg::< + MapType, GenericType<1>>, + GenericType<0>, + GenericType<1>, + BooleanType, + MapType, GenericType<1>>, + >(|source, key: databend_common_expression::ScalarRef, value, allow_update, output, ctx| { + let key_type = &ctx.generics[0]; + if !key_type.is_boolean() + && !key_type.is_string() + && !key_type.is_numeric() + && !key_type.is_decimal() + && !key_type.is_date_or_date_time() { + ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } - if !duplicate_key { - new_map.put_item((key.clone(), value.clone())); + + let duplicate_key = source.iter().any(|(k, _)| k == key); + // if duplicate_key is true and allow_update is false, return the original map + if duplicate_key && !allow_update { + let mut new_builder = ArrayType::create_builder(source.len(), ctx.generics); + source.iter().for_each(|(k, v)| new_builder.put_item((k.clone(), v.clone()))); + new_builder.commit_row(); + output.append_column(&new_builder.build()); + return; } - new_map.commit_row(); - output.append_column(&new_map.build()); + output.append_column(&build_new_map(&source, key, value, ctx)); }), ); + + fn build_new_map( + source: &KvColumn, GenericType<1>>, + insert_key: ScalarRef, + insert_value: ScalarRef, + ctx: &EvalContext + ) -> ArrayColumn, GenericType<1>>> { + let duplicate_key = source.iter().any(|(k, _)| k == insert_key); + let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); + for (k, v) in source.iter() { + if k == insert_key { + new_map.put_item((k.clone(), insert_value.clone())); + continue; + } + new_map.put_item((k.clone(), v.clone())); + } + if !duplicate_key { + new_map.put_item((insert_key.clone(), insert_value.clone())); + } + new_map.commit_row(); + + new_map.build() + } } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 40d021ca3cb3..04306b531edc 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -283,6 +283,16 @@ fn test_map_size(file: &mut impl Write) { fn test_map_insert(file: &mut impl Write) { run_ast(file, "map_insert({}, 'k1', 'v1')", &[]); run_ast(file, "map_insert({'k1': 'v1'}, 'k2', 'v2')", &[]); + run_ast( + file, + "map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', false)", + &[], + ); + run_ast( + file, + "map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', true)", + &[], + ); let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), @@ -306,4 +316,14 @@ fn test_map_insert(file: &mut impl Write) { "map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'k1', 'v10')", &columns, ); + run_ast( + file, + "map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', true)", + &columns, + ); + run_ast( + file, + "map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', false)", + &columns, + ); } diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index a13862f6f63d..b96834bba089 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -635,6 +635,24 @@ output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} output : {'k1':'v1', 'k2':'v2'} +ast : map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', false) +raw expr : map_insert(map(array('k1', 'k2'), array('v1', 'v2')), 'k1', 'v10', false) +checked expr : map_insert(map(array("k1", "k2"), array("v1", "v2")), "k1", "v10", false) +optimized expr : {"k1":"v1", "k2":"v2"} +output type : Map(String, String) +output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} +output : {'k1':'v1', 'k2':'v2'} + + +ast : map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', true) +raw expr : map_insert(map(array('k1', 'k2'), array('v1', 'v2')), 'k1', 'v10', true) +checked expr : map_insert(map(array("k1", "k2"), array("v1", "v2")), "k1", "v10", true) +optimized expr : {"k1":"v10", "k2":"v2"} +output type : Map(String, String) +output domain : {[{"k1"..="k2"}], [{"v10"..="v2"}]} +output : {'k1':'v10', 'k2':'v2'} + + ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'k1', 'v10') raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'k1', 'v10') checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "k1", CAST("v10" AS String NULL)) @@ -663,3 +681,59 @@ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', true) +raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'v10', true) +checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", CAST("v10" AS String NULL), true) +optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "v10", true) +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v10', 'd':'v4', 'x':'v6'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2', 'e':'v5', 'y':NULL, 'a':'v10'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'a':'v10'} | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786265796163667a61, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] }, NullableColumn { column: StringColumn { data: 0x763130763476367632763576313076337637763130, offsets: [0, 3, 5, 7, 9, 11, 11, 14, 16, 16, 18, 21] }, validity: [0b11011111, 0b_____110] }]), offsets: [0, 3, 7, 11] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', false) +raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'v10', false) +checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", CAST("v10" AS String NULL), false) +optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "v10", false) +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'd':'v4', 'x':'v6'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2', 'e':'v5', 'y':NULL, 'a':'v10'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'a':'v10'} | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786265796163667a61, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] }, NullableColumn { column: StringColumn { data: 0x7631763476367632763576313076337637763130, offsets: [0, 2, 4, 6, 8, 10, 10, 13, 15, 15, 17, 20] }, validity: [0b11011111, 0b_____110] }]), offsets: [0, 3, 7, 11] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index 8276e04c9a77..ea2b723f3abe 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -182,6 +182,16 @@ SELECT map_insert({}, 'k1', 'v1') ---- {'k1':'v1'} +query +SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k2', 'v3', true) +---- +{'k1':'v1','k2':'v3'} + +query +SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k2', 'v3', false) +---- +{'k1':'v1','k2':'v2'} + statement ok CREATE TABLE map_insert_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null); @@ -189,12 +199,12 @@ statement ok INSERT INTO map_insert_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, NULL); query -SELECT map_insert(col_int, 'a', 30) +SELECT map_insert(col_str, 'k1', 'k100', true) as update_str, map_insert(col_str, 'k1', 'k100', false) as origin_str FROM map_insert_test; ---- -{'a':30,'b':20} -{'d':40,'e':NULL,'f':50,'a':30} -NULL +{'k1':'k100','k2':'v2','k3':NULL} {'k1':'v1','k2':'v2','k3':NULL} +{'k5':'v5','k6':'v6','k1':'k100'} {'k5':'v5','k6':'v6','k1':'k100'} +{'k1':'k100'} {'k1':'k100'} statement ok DROP DATABASE map_func_test From 35c6a6e00edc473ea12547137f74382dd447fd70 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sun, 19 May 2024 20:05:56 +0800 Subject: [PATCH 5/9] feat(functions): cargo fmt --- src/query/functions/src/scalars/map.rs | 36 ++++++++++++++------------ 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 347ab5f3eff1..687510891636 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -265,7 +265,6 @@ pub fn register(registry: &mut FunctionRegistry) { let mut b = ArrayType::create_builder(1, ctx.generics); b.put_item((key.into_scalar().unwrap(), value.into_scalar().unwrap())); b.commit_row(); - return Value::Scalar(MapType::build_scalar(b)); }, ); @@ -293,7 +292,8 @@ pub fn register(registry: &mut FunctionRegistry) { && !key_type.is_string() && !key_type.is_numeric() && !key_type.is_decimal() - && !key_type.is_date_or_date_time() { + && !key_type.is_date_or_date_time() + { ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } @@ -302,30 +302,32 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); - // grammar: map_insert(map, insert_key, insert_value, allow_update) - registry.register_passthrough_nullable_4_arg( + // grammar: map_insert(map, insert_key, insert_value, allow_update) + registry.register_passthrough_nullable_4_arg( "map_insert", - |_, domain1, key_domain, value_domain, _| - FunctionDomain::Domain(match (domain1, key_domain, value_domain) { - (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( - key_domain.merge(insert_key_domain), - val_domain.merge(insert_value_domain), - )), - (None, _, _) => None, - }), + |_, domain1, key_domain, value_domain, _| { + FunctionDomain::Domain(match (domain1, key_domain, value_domain) { + (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( + key_domain.merge(insert_key_domain), + val_domain.merge(insert_value_domain), + )), + (None, _, _) => None, + }) + }, vectorize_with_builder_4_arg::< MapType, GenericType<1>>, GenericType<0>, GenericType<1>, BooleanType, MapType, GenericType<1>>, - >(|source, key: databend_common_expression::ScalarRef, value, allow_update, output, ctx| { + >(|source, key, value, allow_update, output, ctx| { let key_type = &ctx.generics[0]; if !key_type.is_boolean() && !key_type.is_string() && !key_type.is_numeric() && !key_type.is_decimal() - && !key_type.is_date_or_date_time() { + && !key_type.is_date_or_date_time() + { ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } @@ -333,7 +335,9 @@ pub fn register(registry: &mut FunctionRegistry) { // if duplicate_key is true and allow_update is false, return the original map if duplicate_key && !allow_update { let mut new_builder = ArrayType::create_builder(source.len(), ctx.generics); - source.iter().for_each(|(k, v)| new_builder.put_item((k.clone(), v.clone()))); + source + .iter() + .for_each(|(k, v)| new_builder.put_item((k.clone(), v.clone()))); new_builder.commit_row(); output.append_column(&new_builder.build()); return; @@ -347,7 +351,7 @@ pub fn register(registry: &mut FunctionRegistry) { source: &KvColumn, GenericType<1>>, insert_key: ScalarRef, insert_value: ScalarRef, - ctx: &EvalContext + ctx: &EvalContext, ) -> ArrayColumn, GenericType<1>>> { let duplicate_key = source.iter().any(|(k, _)| k == insert_key); let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); From b287126b0983c7b449c6d1a14b193073bb0a5cbd Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Sat, 25 May 2024 14:52:28 +0800 Subject: [PATCH 6/9] feat(functions): fix NULL mapType in insert --- src/query/functions/src/scalars/map.rs | 65 +++++++++++++------ .../tests/it/scalars/testdata/map.txt | 6 +- .../query/functions/02_0074_function_map.test | 8 +++ 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 687510891636..1f5621042ec7 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -262,6 +262,16 @@ pub fn register(registry: &mut FunctionRegistry) { ))) }, |_, key, value, ctx| { + let key_type = &ctx.generics[0]; + if !key_type.is_boolean() + && !key_type.is_string() + && !key_type.is_numeric() + && !key_type.is_decimal() + && !key_type.is_date_or_date_time() + { + ctx.set_error(0, format!("map keys can not be {}", key_type)); + } + let mut b = ArrayType::create_builder(1, ctx.generics); b.put_item((key.into_scalar().unwrap(), value.into_scalar().unwrap())); b.commit_row(); @@ -269,6 +279,40 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); + registry.register_3_arg_core::, GenericType<1>>>, GenericType<0>, GenericType<1>, MapType, GenericType<1>>, _, _>( + "map_insert", + |_, source_domain, insert_key_domain, insert_value_domain| { + FunctionDomain::Domain(match source_domain.has_null { + true => Some(( + insert_key_domain.clone(), + insert_value_domain.clone(), + )), + false => source_domain.value.as_ref().map(|v| { + let a = v.clone().unwrap(); + (a.0.clone(), a.1.clone()) + }), + }) + }, + vectorize_with_builder_3_arg::< + NullableType, GenericType<1>>>, + GenericType<0>, + GenericType<1>, + MapType, GenericType<1>>, + >(|source, key, value, output, ctx| { + match source { + Some(source) => { + output.append_column(&build_new_map(&source, key, value, ctx)); + }, + None => { + let mut b = ArrayType::create_builder(1, ctx.generics); + b.put_item((key.clone(), value.clone())); + b.commit_row(); + output.append_column(&b.build()); + }, + }; + }), + ); + registry.register_passthrough_nullable_3_arg( "map_insert", |_, domain1, key_domain, value_domain| { @@ -286,17 +330,6 @@ pub fn register(registry: &mut FunctionRegistry) { GenericType<1>, MapType, GenericType<1>>, >(|source, key, value, output, ctx| { - // insert operation only works on specific key type: boolean, string, numeric, decimal, date, datetime - let key_type = &ctx.generics[0]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() - { - ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); - } - // default behavior is to insert new key-value pair, and if the key already exists, update the value. output.append_column(&build_new_map(&source, key, value, ctx)); }), @@ -321,16 +354,6 @@ pub fn register(registry: &mut FunctionRegistry) { BooleanType, MapType, GenericType<1>>, >(|source, key, value, allow_update, output, ctx| { - let key_type = &ctx.generics[0]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() - { - ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); - } - let duplicate_key = source.iter().any(|(k, _)| k == key); // if duplicate_key is true and allow_update is false, return the original map if duplicate_key && !allow_update { diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index b96834bba089..08985344345f 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -628,7 +628,7 @@ output : {'k1':'v1'} ast : map_insert({'k1': 'v1'}, 'k2', 'v2') raw expr : map_insert(map(array('k1'), array('v1')), 'k2', 'v2') -checked expr : map_insert(map(array("k1"), array("v1")), "k2", "v2") +checked expr : map_insert(CAST(map(array("k1"), array("v1")) AS Map(String, String) NULL), "k2", "v2") optimized expr : {"k1":"v1", "k2":"v2"} output type : Map(String, String) output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} @@ -655,8 +655,8 @@ output : {'k1':'v10', 'k2':'v2'} ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'k1', 'v10') raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'k1', 'v10') -checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "k1", CAST("v10" AS String NULL)) -optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "k1", "v10") +checked expr : map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), "k1", CAST("v10" AS String NULL)) +optimized expr : map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), "k1", "v10") evaluation: +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------------------------+ | | a_col | b_col | c_col | d_col | e_col | f_col | Output | diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index ea2b723f3abe..559c2cc9eaec 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -206,5 +206,13 @@ FROM map_insert_test; {'k5':'v5','k6':'v6','k1':'k100'} {'k5':'v5','k6':'v6','k1':'k100'} {'k1':'k100'} {'k1':'k100'} +query +SELECT map_insert(col_int, 'k1', 100) +FROM map_insert_test; +---- +{'a':10,'b':20,'k1':100} +{'d':40,'e':NULL,'f':50,'k1':100} +{'k1':100} + statement ok DROP DATABASE map_func_test From 0a4711be4ae94985fd40b530899f4140524165a1 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 5 Nov 2024 14:29:46 +0800 Subject: [PATCH 7/9] fix --- src/query/functions/src/scalars/map.rs | 221 +++++++------------- src/query/functions/tests/it/scalars/map.rs | 2 - 2 files changed, 77 insertions(+), 146 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index 88f8eea3e5e4..d1cde4cd833f 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -16,13 +16,7 @@ use std::collections::HashSet; use std::hash::Hash; use std::sync::Arc; -<<<<<<< HEAD -use databend_common_expression::types::array::ArrayColumn; use databend_common_expression::types::map::KvColumn; -use databend_common_expression::types::map::KvPair; -======= -use databend_common_expression::types::map::KvColumn; ->>>>>>> main use databend_common_expression::types::nullable::NullableDomain; use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; @@ -40,21 +34,14 @@ use databend_common_expression::types::SimpleDomain; use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; -<<<<<<< HEAD use databend_common_expression::vectorize_with_builder_3_arg; use databend_common_expression::vectorize_with_builder_4_arg; -use databend_common_expression::EvalContext; -======= use databend_common_expression::ColumnBuilder; use databend_common_expression::Function; ->>>>>>> main use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; -<<<<<<< HEAD -======= use databend_common_expression::FunctionSignature; ->>>>>>> main use databend_common_expression::ScalarRef; use databend_common_expression::Value; use databend_common_expression::ValueRef; @@ -363,146 +350,92 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); - registry.register_3_arg_core::, GenericType<1>, MapType, GenericType<1>>, _, _>( - "map_insert", - |_, _, insert_key_domain, insert_value_domain| { - FunctionDomain::Domain(Some(( - insert_key_domain.clone(), - insert_value_domain.clone(), - ))) - }, - |_, key, value, ctx| { - let key_type = &ctx.generics[0]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() - { - ctx.set_error(0, format!("map keys can not be {}", key_type)); - } - - let mut b = ArrayType::create_builder(1, ctx.generics); - b.put_item((key.into_scalar().unwrap(), value.into_scalar().unwrap())); - b.commit_row(); - return Value::Scalar(MapType::build_scalar(b)); - }, - ); - registry.register_3_arg_core::, GenericType<1>>>, GenericType<0>, GenericType<1>, MapType, GenericType<1>>, _, _>( "map_insert", - |_, source_domain, insert_key_domain, insert_value_domain| { - FunctionDomain::Domain(match source_domain.has_null { - true => Some(( - insert_key_domain.clone(), - insert_value_domain.clone(), - )), - false => source_domain.value.as_ref().map(|v| { - let a = v.clone().unwrap(); - (a.0.clone(), a.1.clone()) - }), - }) + |_, map_domain, key_domain, value_domain| { + let domain = map_domain + .value + .as_ref() + .map(|box inner_domain| { + inner_domain + .as_ref() + .map(|(inner_key_domain, inner_value_domain)| (inner_key_domain.merge(key_domain), inner_value_domain.merge(value_domain))) + .unwrap_or((key_domain.clone(), value_domain.clone())) + }); + FunctionDomain::Domain(domain) }, - vectorize_with_builder_3_arg::< - NullableType, GenericType<1>>>, - GenericType<0>, - GenericType<1>, - MapType, GenericType<1>>, - >(|source, key, value, output, ctx| { - match source { - Some(source) => { - output.append_column(&build_new_map(&source, key, value, ctx)); - }, - None => { - let mut b = ArrayType::create_builder(1, ctx.generics); - b.put_item((key.clone(), value.clone())); - b.commit_row(); - output.append_column(&b.build()); - }, - }; - }), - ); - - registry.register_passthrough_nullable_3_arg( - "map_insert", - |_, domain1, key_domain, value_domain| { - FunctionDomain::Domain(match (domain1, key_domain, value_domain) { - (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( - key_domain.merge(insert_key_domain), - val_domain.merge(insert_value_domain), - )), - (None, _, _) => None, - }) - }, - vectorize_with_builder_3_arg::< - MapType, GenericType<1>>, - GenericType<0>, - GenericType<1>, - MapType, GenericType<1>>, - >(|source, key, value, output, ctx| { - // default behavior is to insert new key-value pair, and if the key already exists, update the value. - output.append_column(&build_new_map(&source, key, value, ctx)); - }), + vectorize_with_builder_3_arg::, GenericType<1>>>, GenericType<0>, GenericType<1>, MapType, GenericType<1>>>( + |map, key, val, output, ctx| { + let key_type = &ctx.generics[0]; + if !check_valid_map_key_type(key_type) { + ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); + output.commit_row(); + return; + } + if let Some(map) = map { + for (k, _) in map.iter() { + if k == key { + ctx.set_error(output.len(), format!("map key `{}` duplicte", key)); + output.commit_row(); + return; + } + } + for (k, v) in map.iter() { + output.put_item((k, v)); + } + } + output.put_item((key, val)); + output.commit_row(); + }) ); - // grammar: map_insert(map, insert_key, insert_value, allow_update) - registry.register_passthrough_nullable_4_arg( + registry.register_4_arg_core::, GenericType<1>>>, GenericType<0>, GenericType<1>, BooleanType, MapType, GenericType<1>>, _, _>( "map_insert", - |_, domain1, key_domain, value_domain, _| { - FunctionDomain::Domain(match (domain1, key_domain, value_domain) { - (Some((key_domain, val_domain)), insert_key_domain, insert_value_domain) => Some(( - key_domain.merge(insert_key_domain), - val_domain.merge(insert_value_domain), - )), - (None, _, _) => None, - }) + |_, map_domain, key_domain, value_domain, _| { + let domain = map_domain + .value + .as_ref() + .map(|box inner_domain| { + inner_domain + .as_ref() + .map(|(inner_key_domain, inner_value_domain)| (inner_key_domain.merge(key_domain), inner_value_domain.merge(value_domain))) + .unwrap_or((key_domain.clone(), value_domain.clone())) + }); + FunctionDomain::Domain(domain) }, - vectorize_with_builder_4_arg::< - MapType, GenericType<1>>, - GenericType<0>, - GenericType<1>, - BooleanType, - MapType, GenericType<1>>, - >(|source, key, value, allow_update, output, ctx| { - let duplicate_key = source.iter().any(|(k, _)| k == key); - // if duplicate_key is true and allow_update is false, return the original map - if duplicate_key && !allow_update { - let mut new_builder = ArrayType::create_builder(source.len(), ctx.generics); - source - .iter() - .for_each(|(k, v)| new_builder.put_item((k.clone(), v.clone()))); - new_builder.commit_row(); - output.append_column(&new_builder.build()); - return; - } - - output.append_column(&build_new_map(&source, key, value, ctx)); - }), + vectorize_with_builder_4_arg::, GenericType<1>>>, GenericType<0>, GenericType<1>, BooleanType, MapType, GenericType<1>>>( + |map, key, val, update_flag, output, ctx| { + let key_type = &ctx.generics[0]; + if !check_valid_map_key_type(key_type) { + ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); + output.commit_row(); + return; + } + let mut is_duplicate = false; + if let Some(map) = map { + for (k, _) in map.iter() { + if k == key && !update_flag { + ctx.set_error(output.len(), format!("map key `{}` duplicte", key)); + output.commit_row(); + return; + } + } + for (k, v) in map.iter() { + if k == key { + is_duplicate = true; + output.put_item((k, val.clone())); + } else { + output.put_item((k, v)); + } + } + } + if !is_duplicate { + output.put_item((key, val)); + } + output.commit_row(); + }) ); - fn build_new_map( - source: &KvColumn, GenericType<1>>, - insert_key: ScalarRef, - insert_value: ScalarRef, - ctx: &EvalContext, - ) -> ArrayColumn, GenericType<1>>> { - let duplicate_key = source.iter().any(|(k, _)| k == insert_key); - let mut new_map = ArrayType::create_builder(source.len() + 1, ctx.generics); - for (k, v) in source.iter() { - if k == insert_key { - new_map.put_item((k.clone(), insert_value.clone())); - continue; - } - new_map.put_item((k.clone(), v.clone())); - } - if !duplicate_key { - new_map.put_item((insert_key.clone(), insert_value.clone())); - } - new_map.commit_row(); - - new_map.build() - } - registry.register_function_factory("map_pick", |_, args_type: &[DataType]| { let return_type = check_map_arg_types(args_type)?; Some(Arc::new(Function { diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 5fcf29b37163..b7714227249d 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -397,7 +397,6 @@ fn test_map_pick(file: &mut impl Write) { run_ast(file, "map_pick({}, 'a', 'b')", &[]); run_ast(file, "map_pick({}, [])", &[]); ->>>>>>> main let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), ("b_col", StringType::from_data(vec!["d", "e", "f"])), @@ -469,4 +468,3 @@ fn test_map_insert(file: &mut impl Write) { &columns, ); } - From 03e4a7cf95ff5c44c5d558b612d3d6b18d7c17b8 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 5 Nov 2024 17:52:21 +0800 Subject: [PATCH 8/9] fix tests --- .../it/scalars/testdata/function_list.txt | 5 +- .../tests/it/scalars/testdata/map.txt | 55 ++++++------------- .../query/functions/02_0074_function_map.test | 28 +++------- 3 files changed, 29 insertions(+), 59 deletions(-) diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 88fc0ee3fb6e..89e237d43f5d 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2546,6 +2546,8 @@ Functions overloads: 1 map_contains_key(Map(T0, T1), T0) :: Boolean 2 map_contains_key(Map(T0, T1) NULL, T0 NULL) :: Boolean NULL 0 map_delete FACTORY +0 map_insert(Map(T0, T1) NULL, T0, T1) :: Map(T0, T1) +1 map_insert(Map(T0, T1) NULL, T0, T1, Boolean) :: Map(T0, T1) 0 map_keys(Map(Nothing)) :: Array(Nothing) 1 map_keys(Map(T0, T1)) :: Array(T0) 2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL @@ -2553,9 +2555,6 @@ Functions overloads: 0 map_size(Map(Nothing)) :: UInt8 1 map_size(Map(T0, T1)) :: UInt64 2 map_size(Map(T0, T1) NULL) :: UInt64 NULL -0 map_insert(Map(Nothing), T0, T1) :: Map(Nothing, T1) -1 map_insert(Map(T0, T1), T0, T2) :: Map(T0, T2) -2 map_insert(Map(T0, T1) NULL, T0 NULL, T2 NULL) :: Map(T0, T2) NULL 0 map_values(Map(Nothing)) :: Array(Nothing) 1 map_values(Map(T0, T1)) :: Array(T1) 2 map_values(Map(T0, T1) NULL) :: Array(T1) NULL diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index c80922a71a0b..27de73fb71e7 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -876,7 +876,7 @@ evaluation (internal): ast : map_insert({}, 'k1', 'v1') raw expr : map_insert(map(array(), array()), 'k1', 'v1') -checked expr : map_insert(map(array<>(), array<>()), "k1", "v1") +checked expr : map_insert(CAST(map(array<>(), array<>()) AS Map(String, String) NULL), "k1", "v1") optimized expr : {"k1":"v1"} output type : Map(String, String) output domain : {[{"k1"..="k1"}], [{"v1"..="v1"}]} @@ -892,18 +892,17 @@ output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} output : {'k1':'v1', 'k2':'v2'} -ast : map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', false) -raw expr : map_insert(map(array('k1', 'k2'), array('v1', 'v2')), 'k1', 'v10', false) -checked expr : map_insert(map(array("k1", "k2"), array("v1", "v2")), "k1", "v10", false) -optimized expr : {"k1":"v1", "k2":"v2"} -output type : Map(String, String) -output domain : {[{"k1"..="k2"}], [{"v1"..="v2"}]} -output : {'k1':'v1', 'k2':'v2'} +error: + --> SQL:1:1 + | +1 | map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', false) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'k1'` duplicte while evaluating function `map_insert({'k1':'v1', 'k2':'v2'}, 'k1', 'v10', false)` in expr `map_insert(CAST(map(array('k1', 'k2'), array('v1', 'v2')) AS Map(String, String) NULL), 'k1', 'v10', false)` + ast : map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', true) raw expr : map_insert(map(array('k1', 'k2'), array('v1', 'v2')), 'k1', 'v10', true) -checked expr : map_insert(map(array("k1", "k2"), array("v1", "v2")), "k1", "v10", true) +checked expr : map_insert(CAST(map(array("k1", "k2"), array("v1", "v2")) AS Map(String, String) NULL), "k1", "v10", true) optimized expr : {"k1":"v10", "k2":"v2"} output type : Map(String, String) output domain : {[{"k1"..="k2"}], [{"v10"..="v2"}]} @@ -940,8 +939,8 @@ evaluation (internal): ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', true) raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'v10', true) -checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", CAST("v10" AS String NULL), true) -optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "v10", true) +checked expr : map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), "a", CAST("v10" AS String NULL), true) +optimized expr : map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), "a", "v10", true) evaluation: +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ | | a_col | b_col | c_col | d_col | e_col | f_col | Output | @@ -966,29 +965,11 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -ast : map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', false) -raw expr : map_insert(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'v10', false) -checked expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", CAST("v10" AS String NULL), false) -optimized expr : map_insert(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "v10", false) -evaluation: -+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ -| | a_col | b_col | c_col | d_col | e_col | f_col | Output | -+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ -| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | -| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | -| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'd':'v4', 'x':'v6'} | -| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2', 'e':'v5', 'y':NULL, 'a':'v10'} | -| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {'c':'v3', 'f':NULL, 'z':'v7', 'a':'v10'} | -+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+-------------------------------------------+ -evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6164786265796163667a61, offsets: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] }, NullableColumn { column: StringColumn { data: 0x7631763476367632763576313076337637763130, offsets: [0, 2, 4, 6, 8, 10, 10, 13, 15, 15, 17, 20] }, validity: [0b11011111, 0b_____110] }]), offsets: [0, 3, 7, 11] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +error: + --> SQL:1:1 + | +1 | map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', false) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'a'` duplicte while evaluating function `map_insert({'a':'v1', 'd':'v4', 'x':'v6'}, 'a', 'v10', false)` in expr `map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), 'a', CAST('v10' AS String NULL), false)` + + + diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index 829753c86526..e29e03a18fc7 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -279,10 +279,8 @@ SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k3', NULL) ---- {'k1':'v1','k2':'v2','k3':NULL} -query +statement error 1006 SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'new_v1') ----- -{'k1':'new_v1','k2':'v2'} query SELECT map_insert({}, 'k1', 'v1') @@ -294,10 +292,11 @@ SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k2', 'v3', true) ---- {'k1':'v1','k2':'v3'} -query +statement error 1006 SELECT map_insert({'k1': 'v1', 'k2': 'v2'}, 'k2', 'v3', false) ----- -{'k1':'v1','k2':'v2'} + +statement ok +drop table if exists map_insert_test all statement ok CREATE TABLE map_insert_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null); @@ -306,20 +305,11 @@ statement ok INSERT INTO map_insert_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, NULL); query -SELECT map_insert(col_str, 'k1', 'k100', true) as update_str, map_insert(col_str, 'k1', 'k100', false) as origin_str -FROM map_insert_test; ----- -{'k1':'k100','k2':'v2','k3':NULL} {'k1':'v1','k2':'v2','k3':NULL} -{'k5':'v5','k6':'v6','k1':'k100'} {'k5':'v5','k6':'v6','k1':'k100'} -{'k1':'k100'} {'k1':'k100'} - -query -SELECT map_insert(col_int, 'k1', 100) -FROM map_insert_test; +SELECT map_insert(col_str, 'k1', 'k100', true), map_insert(col_int, 'c', 100, false) FROM map_insert_test; ---- -{'a':10,'b':20,'k1':100} -{'d':40,'e':NULL,'f':50,'k1':100} -{'k1':100} +{'k1':'k100','k2':'v2','k3':NULL} {'a':10,'b':20,'c':100} +{'k5':'v5','k6':'v6','k1':'k100'} {'d':40,'e':NULL,'f':50,'c':100} +{'k1':'k100'} {'c':100} # Test map_filter query T From b531703bfd9a9c984c9b94501d4022952959e68d Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 5 Nov 2024 19:14:19 +0800 Subject: [PATCH 9/9] fix typos --- src/query/functions/src/scalars/map.rs | 4 ++-- src/query/functions/tests/it/scalars/testdata/map.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index d1cde4cd833f..449cb7efbc29 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -375,7 +375,7 @@ pub fn register(registry: &mut FunctionRegistry) { if let Some(map) = map { for (k, _) in map.iter() { if k == key { - ctx.set_error(output.len(), format!("map key `{}` duplicte", key)); + ctx.set_error(output.len(), format!("map key `{}` duplicate", key)); output.commit_row(); return; } @@ -415,7 +415,7 @@ pub fn register(registry: &mut FunctionRegistry) { if let Some(map) = map { for (k, _) in map.iter() { if k == key && !update_flag { - ctx.set_error(output.len(), format!("map key `{}` duplicte", key)); + ctx.set_error(output.len(), format!("map key `{}` duplicate", key)); output.commit_row(); return; } diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 27de73fb71e7..f8618c82aa66 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -896,7 +896,7 @@ error: --> SQL:1:1 | 1 | map_insert({'k1': 'v1', 'k2': 'v2'}, 'k1', 'v10', false) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'k1'` duplicte while evaluating function `map_insert({'k1':'v1', 'k2':'v2'}, 'k1', 'v10', false)` in expr `map_insert(CAST(map(array('k1', 'k2'), array('v1', 'v2')) AS Map(String, String) NULL), 'k1', 'v10', false)` + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'k1'` duplicate while evaluating function `map_insert({'k1':'v1', 'k2':'v2'}, 'k1', 'v10', false)` in expr `map_insert(CAST(map(array('k1', 'k2'), array('v1', 'v2')) AS Map(String, String) NULL), 'k1', 'v10', false)` @@ -969,7 +969,7 @@ error: --> SQL:1:1 | 1 | map_insert(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'v10', false) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'a'` duplicte while evaluating function `map_insert({'a':'v1', 'd':'v4', 'x':'v6'}, 'a', 'v10', false)` in expr `map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), 'a', CAST('v10' AS String NULL), false)` + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ map key `'a'` duplicate while evaluating function `map_insert({'a':'v1', 'd':'v4', 'x':'v6'}, 'a', 'v10', false)` in expr `map_insert(CAST(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)) AS Map(String, String NULL) NULL), 'a', CAST('v10' AS String NULL), false)`