diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index d7a792b8d774..a71e2aadda59 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -61,11 +61,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_2_arg::>, ArrayType>, MapType, GenericType<1>>>( |keys, vals, output, ctx| { let key_type = &ctx.generics[0]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() { + if !check_valid_map_key_type(key_type) { ctx.set_error(output.len(), format!("map keys can not be {}", key_type)); } else if keys.len() != vals.len() { ctx.set_error(output.len(), format!( @@ -241,43 +237,7 @@ pub fn register(registry: &mut FunctionRegistry) { ); registry.register_function_factory("map_delete", |_, args_type| { - if args_type.len() < 2 { - return None; - } - - let map_key_type = match args_type[0].remove_nullable() { - DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { - Some(type_tuple[0].clone()) - } - DataType::EmptyMap => None, - _ => return None, - }; - - if let Some(map_key_type) = map_key_type { - for arg_type in args_type.iter().skip(1) { - if arg_type != &map_key_type { - return None; - } - } - } else { - let key_type = &args_type[1]; - if !key_type.is_boolean() - && !key_type.is_string() - && !key_type.is_numeric() - && !key_type.is_decimal() - && !key_type.is_date_or_date_time() - { - return None; - } - for arg_type in args_type.iter().skip(2) { - if arg_type != key_type { - return None; - } - } - } - - let return_type = args_type[0].clone(); - + let return_type = check_map_arg_types(args_type)?; Some(Arc::new(Function { signature: FunctionSignature { name: "map_delete".to_string(), @@ -297,19 +257,19 @@ pub fn register(registry: &mut FunctionRegistry) { let mut output_map_builder = ColumnBuilder::with_capacity(&return_type, input_length.unwrap_or(1)); + let mut delete_key_list = HashSet::new(); for idx in 0..(input_length.unwrap_or(1)) { - let input_map_sref = match &args[0] { + let input_map = match &args[0] { ValueRef::Scalar(map) => map.clone(), ValueRef::Column(map) => unsafe { map.index_unchecked(idx) }, }; - match &input_map_sref { + match &input_map { ScalarRef::Null | ScalarRef::EmptyMap => { output_map_builder.push_default(); } ScalarRef::Map(col) => { - let mut delete_key_list = HashSet::new(); - + delete_key_list.clear(); for input_key_item in args.iter().skip(1) { let input_key = match &input_key_item { ValueRef::Scalar(scalar) => scalar.clone(), @@ -317,11 +277,27 @@ pub fn register(registry: &mut FunctionRegistry) { col.index_unchecked(idx) }, }; - - delete_key_list.insert(input_key.to_owned()); + match input_key { + ScalarRef::EmptyArray | ScalarRef::Null => {} + ScalarRef::Array(arr_col) => { + for arr_key in arr_col.iter() { + if arr_key == ScalarRef::Null { + continue; + } + delete_key_list.insert(arr_key.to_owned()); + } + } + _ => { + delete_key_list.insert(input_key.to_owned()); + } + } + } + if delete_key_list.is_empty() { + output_map_builder.push(input_map); + continue; } - let inner_builder_type = match input_map_sref.infer_data_type() { + let inner_builder_type = match input_map.infer_data_type() { DataType::Map(box typ) => typ, _ => unreachable!(), }; @@ -330,7 +306,7 @@ pub fn register(registry: &mut FunctionRegistry) { ColumnBuilder::with_capacity(&inner_builder_type, col.len()); let input_map: KvColumn = - MapType::try_downcast_scalar(&input_map_sref).unwrap(); + MapType::try_downcast_scalar(&input_map).unwrap(); input_map.iter().for_each(|(map_key, map_value)| { if !delete_key_list.contains(&map_key.to_owned()) { @@ -371,4 +347,167 @@ pub fn register(registry: &mut FunctionRegistry) { .any(|(k, _)| k == key) }, ); + + registry.register_function_factory("map_pick", |_, args_type: &[DataType]| { + let return_type = check_map_arg_types(args_type)?; + Some(Arc::new(Function { + signature: FunctionSignature { + name: "map_pick".to_string(), + args_type: args_type.to_vec(), + return_type: args_type[0].clone(), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, args_domain| { + FunctionDomain::Domain(args_domain[0].clone()) + }), + eval: Box::new(move |args, _ctx| { + let input_length = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + + let mut output_map_builder = + ColumnBuilder::with_capacity(&return_type, input_length.unwrap_or(1)); + + let mut pick_key_list = HashSet::new(); + for idx in 0..(input_length.unwrap_or(1)) { + let input_map = match &args[0] { + ValueRef::Scalar(map) => map.clone(), + ValueRef::Column(map) => unsafe { map.index_unchecked(idx) }, + }; + + match &input_map { + ScalarRef::Null | ScalarRef::EmptyMap => { + output_map_builder.push_default(); + } + ScalarRef::Map(col) => { + pick_key_list.clear(); + for input_key_item in args.iter().skip(1) { + let input_key = match &input_key_item { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { + col.index_unchecked(idx) + }, + }; + match input_key { + ScalarRef::EmptyArray | ScalarRef::Null => {} + ScalarRef::Array(arr_col) => { + for arr_key in arr_col.iter() { + if arr_key == ScalarRef::Null { + continue; + } + pick_key_list.insert(arr_key.to_owned()); + } + } + _ => { + pick_key_list.insert(input_key.to_owned()); + } + } + } + if pick_key_list.is_empty() { + output_map_builder.push_default(); + continue; + } + + let inner_builder_type = match input_map.infer_data_type() { + DataType::Map(box typ) => typ, + _ => unreachable!(), + }; + + let mut filtered_kv_builder = + ColumnBuilder::with_capacity(&inner_builder_type, col.len()); + + let input_map: KvColumn = + MapType::try_downcast_scalar(&input_map).unwrap(); + + input_map.iter().for_each(|(map_key, map_value)| { + if pick_key_list.contains(&map_key.to_owned()) { + filtered_kv_builder.push(ScalarRef::Tuple(vec![ + map_key.clone(), + map_value.clone(), + ])); + } + }); + output_map_builder + .push(ScalarRef::Map(filtered_kv_builder.build())); + } + _ => unreachable!(), + } + } + + match input_length { + Some(_) => Value::Column(output_map_builder.build()), + None => Value::Scalar(output_map_builder.build_scalar()), + } + }), + }, + })) + }); +} + +// Check map function arg types +// 1. The first arg must be a Map or EmptyMap. +// 2. The second arg can be an Array or EmptyArray. +// 3. Multiple args with same key type is also valid. +fn check_map_arg_types(args_type: &[DataType]) -> Option { + if args_type.len() < 2 { + return None; + } + + let map_key_type = match args_type[0].remove_nullable() { + DataType::Map(box DataType::Tuple(type_tuple)) if type_tuple.len() == 2 => { + Some(type_tuple[0].clone()) + } + DataType::EmptyMap => None, + _ => return None, + }; + + // the second argument can be an array of keys. + let (is_array, array_key_type) = match args_type[1].remove_nullable() { + DataType::Array(box key_type) => (true, Some(key_type.remove_nullable())), + DataType::EmptyArray => (true, None), + _ => (false, None), + }; + if is_array && args_type.len() != 2 { + return None; + } + if let Some(map_key_type) = map_key_type { + if is_array { + if let Some(array_key_type) = array_key_type { + if array_key_type != DataType::Null && array_key_type != map_key_type { + return None; + } + } + } else { + for arg_type in args_type.iter().skip(1) { + let arg_type = arg_type.remove_nullable(); + if arg_type != DataType::Null && arg_type != map_key_type { + return None; + } + } + } + } else if is_array { + if let Some(array_key_type) = array_key_type { + if array_key_type != DataType::Null && !check_valid_map_key_type(&array_key_type) { + return None; + } + } + } else { + for arg_type in args_type.iter().skip(1) { + let arg_type = arg_type.remove_nullable(); + if arg_type != DataType::Null && !check_valid_map_key_type(&arg_type) { + return None; + } + } + } + let return_type = args_type[0].clone(); + Some(return_type) +} + +fn check_valid_map_key_type(key_type: &DataType) -> bool { + key_type.is_boolean() + || key_type.is_string() + || key_type.is_numeric() + || key_type.is_decimal() + || key_type.is_date_or_date_time() } diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index 75eb38b2ad4d..7b2f0f0fd6d4 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -33,6 +33,7 @@ fn test_map() { test_map_cat(file); test_map_delete(file); test_map_contains_key(file); + test_map_pick(file); } fn test_map_cat(file: &mut impl Write) { @@ -296,6 +297,11 @@ fn test_map_delete(file: &mut impl Write) { "map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, 'k3', 'k2')", &[], ); + run_ast( + file, + "map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2'])", + &[], + ); // Deleting keys from a nested map let columns = [ @@ -381,3 +387,35 @@ fn test_map_delete(file: &mut impl Write) { &columns, ); } + +fn test_map_pick(file: &mut impl Write) { + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]); + run_ast(file, "map_pick({1:'a',2:'b',3:'c'}, 1, 3)", &[]); + run_ast(file, "map_pick({}, 'a', 'b')", &[]); + run_ast(file, "map_pick({}, [])", &[]); + + let columns = [ + ("a_col", StringType::from_data(vec!["a", "b", "c"])), + ("b_col", StringType::from_data(vec!["d", "e", "f"])), + ("c_col", StringType::from_data(vec!["x", "y", "z"])), + ( + "d_col", + StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]), + ), + ( + "e_col", + StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]), + ), + ( + "f_col", + StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]), + ), + ]; + run_ast( + file, + "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')", + &columns, + ); +} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 7d4cf1b16a97..88040668a612 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2547,6 +2547,7 @@ Functions overloads: 0 map_keys(Map(Nothing)) :: Array(Nothing) 1 map_keys(Map(T0, T1)) :: Array(T0) 2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL +0 map_pick FACTORY 0 map_size(Map(Nothing)) :: UInt8 1 map_size(Map(T0, T1)) :: UInt64 2 map_size(Map(T0, T1) NULL) :: UInt64 NULL diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index e5784d3f90de..e8c3d262cae0 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -553,12 +553,13 @@ error: -error: - --> SQL:1:1 - | -1 | map_delete({}, NULL, NULL) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ no function matches signature `map_delete(Map(Nothing), NULL, NULL)`, you might need to add explicit type casts. - +ast : map_delete({}, NULL, NULL) +raw expr : map_delete(map(array(), array()), NULL, NULL) +checked expr : map_delete(map(array<>(), array<>()), NULL, NULL) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} error: @@ -578,6 +579,15 @@ output domain : {[{"k1"..="k4"}], [{"v1"..="v4"}]} output : {'k1':'v1', 'k4':'v4'} +ast : map_delete({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2']) +raw expr : map_delete(map(array('k1', 'k2', 'k3', 'k4'), array('v1', 'v2', 'v3', 'v4')), array('k3', 'k2')) +checked expr : map_delete(map(array("k1", "k2", "k3", "k4"), array("v1", "v2", "v3", "v4")), array("k3", "k2")) +optimized expr : {"k1":"v1", "k4":"v4"} +output type : Map(String, String) +output domain : {[{"k1"..="k4"}], [{"v1"..="v4"}]} +output : {'k1':'v1', 'k4':'v4'} + + ast : map_delete(map([a_col, b_col], [d_col, e_col]), 'a_k2', 'b_k3') raw expr : map_delete(map(array(a_col::String, b_col::String), array(d_col::String, e_col::String)), 'a_k2', 'b_k3') checked expr : map_delete(map(array(a_col, b_col), array(d_col, e_col)), "a_k2", "b_k3") @@ -783,3 +793,84 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ +ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b') +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), 'a', 'b') +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), "a", "b") +optimized expr : {"a":1_u8, "b":2_u8} +output type : Map(String, UInt8) +output domain : {[{"a"..="b"}], [{1..=2}]} +output : {'a':1, 'b':2} + + +ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b']) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b')) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array("a", "b")) +optimized expr : {"a":1_u8, "b":2_u8} +output type : Map(String, UInt8) +output domain : {[{"a"..="b"}], [{1..=2}]} +output : {'a':1, 'b':2} + + +ast : map_pick({'a':1,'b':2,'c':3}, []) +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array()) +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array<>()) +optimized expr : {} +output type : Map(String, UInt8) +output domain : {} +output : {} + + +ast : map_pick({1:'a',2:'b',3:'c'}, 1, 3) +raw expr : map_pick(map(array(1, 2, 3), array('a', 'b', 'c')), 1, 3) +checked expr : map_pick(map(array(1_u8, 2_u8, 3_u8), array("a", "b", "c")), 1_u8, 3_u8) +optimized expr : {1_u8:"a", 3_u8:"c"} +output type : Map(UInt8, String) +output domain : {[{1..=3}], [{"a"..="c"}]} +output : {1:'a', 3:'c'} + + +ast : map_pick({}, 'a', 'b') +raw expr : map_pick(map(array(), array()), 'a', 'b') +checked expr : map_pick(map(array<>(), array<>()), "a", "b") +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + +ast : map_pick({}, []) +raw expr : map_pick(map(array(), array()), array()) +checked expr : map_pick(map(array<>(), array<>()), array<>()) +optimized expr : {} :: Map(Nothing) +output type : Map(Nothing) +output domain : {} +output : {} + + +ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b') +raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b') +checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "b") +evaluation: ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +| | a_col | b_col | c_col | d_col | e_col | f_col | Output | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | +| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} | +| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} | +| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | ++--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index 7237331765fa..5fc29b96f1f8 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -144,6 +144,15 @@ SELECT MAP_DELETE( ---- {'k1':'v1','k4':'v4'} +query +SELECT MAP_DELETE({'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, ['k3', 'k2', 'k10']) +---- +{'k1':'v1','k4':'v4'} + +query +SELECT MAP_DELETE({}, 'k1', 'k2') +---- +{} # Deleting keys from a nested map statement ok @@ -224,6 +233,47 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2') ---- 1 +# Test map_pick function +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3') +---- +{'k1':'v1','k3':NULL} + +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3', 'k10']) +---- +{'k1':'v1','k3':NULL} + +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '') +---- +{} + +query +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, []) +---- +{} + +query +SELECT map_pick({}, 1, 2) +---- +{} + +statement ok +DROP TABLE IF EXISTS map_pick_test + +statement ok +CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null) + +statement ok +INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}) + +query +SELECT map_pick(col_str, 'k1', 'k3'), map_pick(col_int, ['a', 'e', 'x']) FROM map_pick_test +---- +{'k1':'v1','k3':NULL} {'a':10} +{} {'e':NULL} + # Test map_filter query T SELECT map_filter({1:0,2:2,3:-1}, (k, v) -> k > v);