From b3bff22e8cbb23f07c52f576a5d20dc6c6e8e612 Mon Sep 17 00:00:00 2001 From: hanxuanliang Date: Thu, 23 May 2024 09:15:31 +0800 Subject: [PATCH] feat(functions): add factory function for map_pick --- src/query/functions/src/scalars/map.rs | 80 +++++++++++++++++++ src/query/functions/tests/it/scalars/map.rs | 7 +- .../tests/it/scalars/testdata/map.txt | 66 +++++---------- .../query/functions/02_0074_function_map.test | 13 +-- 4 files changed, 104 insertions(+), 62 deletions(-) diff --git a/src/query/functions/src/scalars/map.rs b/src/query/functions/src/scalars/map.rs index b2ac194c63992..600ab08dc3e84 100644 --- a/src/query/functions/src/scalars/map.rs +++ b/src/query/functions/src/scalars/map.rs @@ -14,11 +14,16 @@ use std::collections::HashSet; use std::hash::Hash; +use std::sync::Arc; +use databend_common_expression::types::array::ArrayColumnBuilder; +use databend_common_expression::types::map::KvPair; use databend_common_expression::types::nullable::NullableDomain; +use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; use databend_common_expression::types::BooleanType; +use databend_common_expression::types::DataType; use databend_common_expression::types::EmptyArrayType; use databend_common_expression::types::EmptyMapType; use databend_common_expression::types::GenericType; @@ -27,11 +32,20 @@ use databend_common_expression::types::NullType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberType; use databend_common_expression::types::SimpleDomain; +use databend_common_expression::types::ValueType; use databend_common_expression::vectorize_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; +use databend_common_expression::Column; +use databend_common_expression::EvalContext; +use databend_common_expression::Function; use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; +use databend_common_expression::FunctionSignature; +use databend_common_expression::Scalar; +use databend_common_expression::ScalarRef; use databend_common_expression::Value; +use databend_common_expression::ValueRef; use databend_common_hashtable::StackHashSet; use siphasher::sip128::Hasher128; use siphasher::sip128::SipHasher24; @@ -245,6 +259,72 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); + registry.register_function_factory("map_pick", |_, args_type: &[DataType]| { + if args_type.len() < 2 { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "map_pick".to_string(), + args_type: args_type.to_vec(), + return_type: args_type[0].clone(), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(move |_, _| FunctionDomain::Full), + eval: Box::new(map_pick_fn_vec), + }, + })) + }); + + fn map_pick_fn_vec(args: &[ValueRef], _: &mut EvalContext) -> Value { + let len = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + + let source_data_type = match args.first().unwrap() { + ValueRef::Scalar(s) => s.infer_data_type(), + ValueRef::Column(c) => c.data_type(), + }; + + let source_map = match &args[0] { + ValueRef::Scalar(ScalarRef::Map(s)) => { + KvPair::, GenericType<1>>::try_downcast_column(s).unwrap() + } + ValueRef::Column(Column::Map(c)) => { + KvPair::, GenericType<1>>::try_downcast_column(&c.values).unwrap() + } + _ => unreachable!(), + }; + + let mut builder: ArrayColumnBuilder, GenericType<1>>> = + ArrayType::create_builder( + args.len() - 1, + &source_data_type.as_map().unwrap().as_tuple().unwrap(), + ); + for key_arg in args[1..].iter() { + if let Some((k, v)) = source_map + .iter() + .find(|(k, _)| k == key_arg.as_scalar().unwrap()) + { + builder.put_item((k.clone(), v.clone())); + } + } + builder.commit_row(); + + match len { + Some(_) => Value::Column(Column::Map(Box::new(builder.build().upcast()))), + _ => { + let scalar_builder = builder.build_scalar(); + Value::Scalar(Scalar::Map(Column::Tuple(vec![ + scalar_builder.keys, + scalar_builder.values, + ]))) + } + } + } + registry.register_2_arg_core::( "map_pick", |_, _, _| FunctionDomain::Full, diff --git a/src/query/functions/tests/it/scalars/map.rs b/src/query/functions/tests/it/scalars/map.rs index df91c05b5aa41..20408c9d3450f 100644 --- a/src/query/functions/tests/it/scalars/map.rs +++ b/src/query/functions/tests/it/scalars/map.rs @@ -281,10 +281,7 @@ fn test_map_size(file: &mut impl Write) { } fn test_map_pick(file: &mut impl Write) { - run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]); - run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]); - run_ast(file, "map_pick({}, [])", &[]); - run_ast(file, "map_pick({}, ['d'])", &[]); + run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]); let columns = [ ("a_col", StringType::from_data(vec!["a", "b", "c"])), @@ -305,7 +302,7 @@ fn test_map_pick(file: &mut impl Write) { ]; run_ast( file, - "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])", + "map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')", &columns, ); } diff --git a/src/query/functions/tests/it/scalars/testdata/map.txt b/src/query/functions/tests/it/scalars/testdata/map.txt index 65dc4e9418294..3c782159a0c50 100644 --- a/src/query/functions/tests/it/scalars/testdata/map.txt +++ b/src/query/functions/tests/it/scalars/testdata/map.txt @@ -617,67 +617,37 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ -ast : map_pick({'a':1,'b':2,'c':3}, []) -raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array()) -checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), CAST(array<>() AS Array(String))) -optimized expr : {} -output type : Map(String, UInt8) -output domain : {} -output : {} - - -ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b']) -raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b')) -checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), array("a", "b")) +ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b') +raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), 'a', 'b') +checked expr : map_pick(map(array("a", "b", "c"), array(1_u8, 2_u8, 3_u8)), "a", "b") optimized expr : {"a":1_u8, "b":2_u8} output type : Map(String, UInt8) output domain : {[{"a"..="b"}], [{1..=2}]} output : {'a':1, 'b':2} -ast : map_pick({}, []) -raw expr : map_pick(map(array(), array()), array()) -checked expr : map_pick(map(array<>(), array<>()), array<>()) -optimized expr : {} :: Map(Nothing) -output type : Map(Nothing) -output domain : {} -output : {} - - -ast : map_pick({}, ['d']) -raw expr : map_pick(map(array(), array()), array('d')) -checked expr : map_pick(map(array<>(), array<>()), array("d")) -optimized expr : {} :: Map(Nothing) -output type : Map(Nothing) -output domain : {} -output : {} - - -ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b']) -raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), array('a', 'b')) -checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), array("a", "b")) -optimized expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), ['a', 'b']) +ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b') +raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b') +checked expr : map_pick(map(array(a_col, b_col, c_col), array(d_col, e_col, f_col)), "a", "b") evaluation: +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ | | a_col | b_col | c_col | d_col | e_col | f_col | Output | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ | Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) | | Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown | -| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} | -| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} | -| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} | +| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'b':'v2'} | +--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | -| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | -| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | -| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | -| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | -| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | -| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } | +| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } | +| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } | +| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } | +| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } | +| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } | +| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 2] } | ++--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test index 808b4d425bb91..1e6b4d6abad66 100644 --- a/tests/sqllogictests/suites/query/functions/02_0074_function_map.test +++ b/tests/sqllogictests/suites/query/functions/02_0074_function_map.test @@ -169,17 +169,12 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2') # Test map_pick function query -SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3']) +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3') ---- {'k1':'v1','k3':NULL} query -SELECT map_pick({}, ['k1', 'k2', 'k3']) ----- -{} - -query -SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, []) +SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '') ---- {} @@ -187,10 +182,10 @@ statement ok CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null) statement ok -INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null) +INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}) query -SELECT map_pick(col_str, ['k1', 'k3']), map_pick(col_str, []), map_pick(col_int, ['a', 'b', 'c']) +SELECT map_pick(col_str, 'k1', 'k3') FROM map_pick_test ---- {'k1':'v1','k3':NULL} {} {'a':10,'b':20}