Skip to content

Commit

Permalink
feat(functions): add factory function for map_pick
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxuanliang committed May 23, 2024
1 parent d22c52f commit b3bff22
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 62 deletions.
80 changes: 80 additions & 0 deletions src/query/functions/src/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@

use std::collections::HashSet;
use std::hash::Hash;
use std::sync::Arc;

use databend_common_expression::types::array::ArrayColumnBuilder;
use databend_common_expression::types::map::KvPair;
use databend_common_expression::types::nullable::NullableDomain;
use databend_common_expression::types::AnyType;
use databend_common_expression::types::ArgType;
use databend_common_expression::types::ArrayType;
use databend_common_expression::types::BooleanType;
use databend_common_expression::types::DataType;
use databend_common_expression::types::EmptyArrayType;
use databend_common_expression::types::EmptyMapType;
use databend_common_expression::types::GenericType;
Expand All @@ -27,11 +32,20 @@ use databend_common_expression::types::NullType;
use databend_common_expression::types::NullableType;
use databend_common_expression::types::NumberType;
use databend_common_expression::types::SimpleDomain;
use databend_common_expression::types::ValueType;
use databend_common_expression::vectorize_1_arg;
use databend_common_expression::vectorize_with_builder_2_arg;
use databend_common_expression::Column;
use databend_common_expression::EvalContext;
use databend_common_expression::Function;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionEval;
use databend_common_expression::FunctionRegistry;
use databend_common_expression::FunctionSignature;
use databend_common_expression::Scalar;
use databend_common_expression::ScalarRef;
use databend_common_expression::Value;
use databend_common_expression::ValueRef;
use databend_common_hashtable::StackHashSet;
use siphasher::sip128::Hasher128;
use siphasher::sip128::SipHasher24;
Expand Down Expand Up @@ -245,6 +259,72 @@ pub fn register(registry: &mut FunctionRegistry) {
},
);

registry.register_function_factory("map_pick", |_, args_type: &[DataType]| {
if args_type.len() < 2 {
return None;
}

Some(Arc::new(Function {
signature: FunctionSignature {
name: "map_pick".to_string(),
args_type: args_type.to_vec(),
return_type: args_type[0].clone(),
},
eval: FunctionEval::Scalar {
calc_domain: Box::new(move |_, _| FunctionDomain::Full),
eval: Box::new(map_pick_fn_vec),
},
}))
});

fn map_pick_fn_vec(args: &[ValueRef<AnyType>], _: &mut EvalContext) -> Value<AnyType> {
let len = args.iter().find_map(|arg| match arg {
ValueRef::Column(col) => Some(col.len()),
_ => None,
});

let source_data_type = match args.first().unwrap() {
ValueRef::Scalar(s) => s.infer_data_type(),
ValueRef::Column(c) => c.data_type(),
};

let source_map = match &args[0] {
ValueRef::Scalar(ScalarRef::Map(s)) => {
KvPair::<GenericType<0>, GenericType<1>>::try_downcast_column(s).unwrap()
}
ValueRef::Column(Column::Map(c)) => {
KvPair::<GenericType<0>, GenericType<1>>::try_downcast_column(&c.values).unwrap()
}
_ => unreachable!(),
};

let mut builder: ArrayColumnBuilder<KvPair<GenericType<0>, GenericType<1>>> =
ArrayType::create_builder(
args.len() - 1,
&source_data_type.as_map().unwrap().as_tuple().unwrap(),
);
for key_arg in args[1..].iter() {
if let Some((k, v)) = source_map
.iter()
.find(|(k, _)| k == key_arg.as_scalar().unwrap())
{
builder.put_item((k.clone(), v.clone()));
}
}
builder.commit_row();

match len {
Some(_) => Value::Column(Column::Map(Box::new(builder.build().upcast()))),
_ => {
let scalar_builder = builder.build_scalar();
Value::Scalar(Scalar::Map(Column::Tuple(vec![
scalar_builder.keys,
scalar_builder.values,
])))
}
}
}

registry.register_2_arg_core::<EmptyMapType, EmptyArrayType, EmptyMapType, _, _>(
"map_pick",
|_, _, _| FunctionDomain::Full,
Expand Down
7 changes: 2 additions & 5 deletions src/query/functions/tests/it/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,7 @@ fn test_map_size(file: &mut impl Write) {
}

fn test_map_pick(file: &mut impl Write) {
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, [])", &[]);
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]);
run_ast(file, "map_pick({}, [])", &[]);
run_ast(file, "map_pick({}, ['d'])", &[]);
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]);

let columns = [
("a_col", StringType::from_data(vec!["a", "b", "c"])),
Expand All @@ -305,7 +302,7 @@ fn test_map_pick(file: &mut impl Write) {
];
run_ast(
file,
"map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])",
"map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')",
&columns,
);
}
66 changes: 18 additions & 48 deletions src/query/functions/tests/it/scalars/testdata/map.txt
Original file line number Diff line number Diff line change
Expand Up @@ -617,67 +617,37 @@ evaluation (internal):
+--------+-----------------------------------------------------------------------------------------------------------------+


ast : map_pick({'a':1,'b':2,'c':3}, [])
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array())
checked expr : map_pick<T0=String, T1=UInt8><Map(T0, T1), Array(T0)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), CAST(array<>() AS Array(String)))
optimized expr : {}
output type : Map(String, UInt8)
output domain : {}
output : {}


ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b'))
checked expr : map_pick<T0=String, T1=UInt8><Map(T0, T1), Array(T0)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), array<T0=String><T0, T0>("a", "b"))
ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b')
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), 'a', 'b')
checked expr : map_pick<Map(String, UInt8), String, String>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), "a", "b")
optimized expr : {"a":1_u8, "b":2_u8}
output type : Map(String, UInt8)
output domain : {[{"a"..="b"}], [{1..=2}]}
output : {'a':1, 'b':2}


ast : map_pick({}, [])
raw expr : map_pick(map(array(), array()), array())
checked expr : map_pick<Map(Nothing), Array(Nothing)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()), array<>())
optimized expr : {} :: Map(Nothing)
output type : Map(Nothing)
output domain : {}
output : {}


ast : map_pick({}, ['d'])
raw expr : map_pick(map(array(), array()), array('d'))
checked expr : map_pick<T0=String><Map(Nothing), Array(T0)>(map<Array(Nothing), Array(Nothing)>(array<>(), array<>()), array<T0=String><T0>("d"))
optimized expr : {} :: Map(Nothing)
output type : Map(Nothing)
output domain : {}
output : {}


ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), ['a', 'b'])
raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), array('a', 'b'))
checked expr : map_pick<T0=String, T1=String NULL><Map(T0, T1), Array(T0)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), array<T0=String><T0, T0>("a", "b"))
optimized expr : map_pick<T0=String, T1=String NULL><Map(T0, T1), Array(T0)>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), ['a', 'b'])
ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')
raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b')
checked expr : map_pick<Map(String, String NULL), String, String>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), "a", "b")
evaluation:
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| | a_col | b_col | c_col | d_col | e_col | f_col | Output |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) |
| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1'} |
| Row 1 | 'b' | 'e' | 'y' | 'v2' | 'v5' | NULL | {'b':'v2'} |
| Row 2 | 'c' | 'f' | 'z' | 'v3' | NULL | 'v7' | {} |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'b':'v2'} |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
evaluation (internal):
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 1, 2, 2] } |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 2] } |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+


Original file line number Diff line number Diff line change
Expand Up @@ -169,28 +169,23 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2')

# Test map_pick function
query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, ['k1', 'k3'])
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3')
----
{'k1':'v1','k3':NULL}

query
SELECT map_pick({}, ['k1', 'k2', 'k3'])
----
{}

query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, [])
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '')
----
{}

statement ok
CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null)

statement ok
INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50}), ({}, null)
INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50})

query
SELECT map_pick(col_str, ['k1', 'k3']), map_pick(col_str, []), map_pick(col_int, ['a', 'b', 'c'])
SELECT map_pick(col_str, 'k1', 'k3')
FROM map_pick_test
----
{'k1':'v1','k3':NULL} {} {'a':10,'b':20}
Expand Down

0 comments on commit b3bff22

Please sign in to comment.