Skip to content

Commit

Permalink
pass group id to join and fix filter-related tests
Browse files Browse the repository at this point in the history
  • Loading branch information
xx01cyx committed Nov 19, 2024
1 parent 489ff48 commit be71afb
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 63 deletions.
18 changes: 18 additions & 0 deletions optd-cost-model/src/common/properties/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,21 @@ impl std::fmt::Display for Attribute {
}
}
}

impl Attribute {
pub fn new(name: String, typ: ConstantType, nullable: bool) -> Self {
Self {
name,
typ,
nullable,
}
}

pub fn new_non_null_int64(name: String) -> Self {
Self {
name,
typ: ConstantType::Int64,
nullable: false,
}
}
}
89 changes: 82 additions & 7 deletions optd-cost-model/src/cost/join/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
pub(crate) async fn get_join_selectivity_from_expr_tree(
&self,
join_typ: JoinType,
group_id: GroupId,
expr_tree: ArcPredicateNode,
attr_refs: &AttrRefs,
input_correlation: Option<SemanticCorrelation>,
Expand Down Expand Up @@ -61,6 +62,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
};
self.get_join_selectivity_core(
join_typ,
group_id,
on_attr_ref_pairs,
filter_expr_tree,
attr_refs,
Expand All @@ -75,6 +77,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
if let Some(on_attr_ref_pair) = get_on_attr_ref_pair(expr_tree.clone(), attr_refs) {
self.get_join_selectivity_core(
join_typ,
group_id,
vec![on_attr_ref_pair],
None,
attr_refs,
Expand All @@ -87,6 +90,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
} else {
self.get_join_selectivity_core(
join_typ,
group_id,
vec![],
Some(expr_tree),
attr_refs,
Expand All @@ -105,6 +109,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
pub(crate) async fn get_join_selectivity_from_keys(
&self,
join_typ: JoinType,
group_id: GroupId,
left_keys: ListPred,
right_keys: ListPred,
attr_refs: &AttrRefs,
Expand All @@ -129,6 +134,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
.collect_vec();
self.get_join_selectivity_core(
join_typ,
group_id,
on_attr_ref_pairs,
None,
attr_refs,
Expand Down Expand Up @@ -156,6 +162,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
async fn get_join_selectivity_core(
&self,
join_typ: JoinType,
group_id: GroupId,
on_attr_ref_pairs: Vec<(AttrIndexPred, AttrIndexPred)>,
filter_expr_tree: Option<ArcPredicateNode>,
attr_refs: &AttrRefs,
Expand All @@ -180,8 +187,6 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
// get_filter_selectivity() function, but this may change in the future.
let join_filter_selectivity = match filter_expr_tree {
Some(filter_expr_tree) => {
// FIXME(group_id): Pass in group id or schema & attr_refs
let group_id = GroupId(0);
self.get_filter_selectivity(group_id, filter_expr_tree)
.await?
}
Expand Down Expand Up @@ -405,20 +410,28 @@ mod tests {
use std::collections::HashMap;

use crate::{
common::{predicates::attr_index_pred, types::TableId, values::Value},
common::{
predicates::{attr_index_pred, constant_pred::ConstantType},
properties::Attribute,
types::TableId,
values::Value,
},
cost_model::tests::{
attr_index, bin_op, cnst, create_four_table_mock_cost_model, create_mock_cost_model,
create_three_table_mock_cost_model, create_two_table_mock_cost_model,
create_two_table_mock_cost_model_custom_row_cnts, empty_per_attr_stats, log_op,
per_attr_stats_with_dist_and_ndistinct, per_attr_stats_with_ndistinct,
TestOptCostModelMock, TestPerAttributeStats, TEST_TABLE1_ID, TEST_TABLE2_ID,
TEST_TABLE3_ID, TEST_TABLE4_ID,
TestOptCostModelMock, TestPerAttributeStats, TEST_ATTR1_NAME, TEST_ATTR2_NAME,
TEST_TABLE1_ID, TEST_TABLE2_ID, TEST_TABLE3_ID, TEST_TABLE4_ID,
},
memo_ext::tests::MemoGroupInfo,
stats::DEFAULT_EQ_SEL,
};

use super::*;

const JOIN_GROUP_ID: GroupId = GroupId(10);

/// A wrapper around get_join_selectivity_from_expr_tree that extracts the
/// table row counts from the cost model.
async fn test_get_join_selectivity(
Expand All @@ -436,6 +449,7 @@ mod tests {
cost_model
.get_join_selectivity_from_expr_tree(
join_typ,
JOIN_GROUP_ID,
expr_tree,
attr_refs,
input_correlation,
Expand All @@ -448,6 +462,7 @@ mod tests {
cost_model
.get_join_selectivity_from_expr_tree(
join_typ,
JOIN_GROUP_ID,
expr_tree,
attr_refs,
input_correlation,
Expand All @@ -470,6 +485,7 @@ mod tests {
cost_model
.get_join_selectivity_from_expr_tree(
JoinType::Inner,
JOIN_GROUP_ID,
cnst(Value::Bool(true)),
&vec![],
None,
Expand All @@ -484,6 +500,7 @@ mod tests {
cost_model
.get_join_selectivity_from_expr_tree(
JoinType::Inner,
JOIN_GROUP_ID,
cnst(Value::Bool(false)),
&vec![],
None,
Expand All @@ -501,6 +518,7 @@ mod tests {
let cost_model = create_two_table_mock_cost_model(
per_attr_stats_with_ndistinct(5),
per_attr_stats_with_ndistinct(4),
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -540,6 +558,7 @@ mod tests {
let cost_model = create_two_table_mock_cost_model(
per_attr_stats_with_ndistinct(5),
per_attr_stats_with_ndistinct(4),
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -578,11 +597,28 @@ mod tests {
}

#[tokio::test]
#[ignore = "index out of bounds: the len is 1 but the index is 1"]
async fn test_inner_and_of_oncond_and_filter() {
let join_memo = HashMap::from([(
JOIN_GROUP_ID,
MemoGroupInfo::new(
vec![
Attribute::new_non_null_int64(TEST_ATTR1_NAME.to_string()),
Attribute::new_non_null_int64(TEST_ATTR2_NAME.to_string()),
]
.into(),
GroupAttrRefs::new(
vec![
AttrRef::new_base_table_attr_ref(TEST_TABLE1_ID, 0),
AttrRef::new_base_table_attr_ref(TEST_TABLE2_ID, 0),
],
None,
),
),
)]);
let cost_model = create_two_table_mock_cost_model(
per_attr_stats_with_ndistinct(5),
per_attr_stats_with_ndistinct(4),
Some(join_memo),
);

let attr_refs = vec![
Expand Down Expand Up @@ -621,11 +657,28 @@ mod tests {
}

#[tokio::test]
#[ignore = "filter todo"]
async fn test_inner_and_of_filters() {
let join_memo = HashMap::from([(
JOIN_GROUP_ID,
MemoGroupInfo::new(
vec![
Attribute::new_non_null_int64(TEST_ATTR1_NAME.to_string()),
Attribute::new_non_null_int64(TEST_ATTR2_NAME.to_string()),
]
.into(),
GroupAttrRefs::new(
vec![
AttrRef::new_base_table_attr_ref(TEST_TABLE1_ID, 0),
AttrRef::new_base_table_attr_ref(TEST_TABLE2_ID, 0),
],
None,
),
),
)]);
let cost_model = create_two_table_mock_cost_model(
per_attr_stats_with_ndistinct(5),
per_attr_stats_with_ndistinct(4),
Some(join_memo),
);

let attr_refs = vec![
Expand Down Expand Up @@ -668,6 +721,7 @@ mod tests {
let cost_model = create_two_table_mock_cost_model(
per_attr_stats_with_ndistinct(5),
per_attr_stats_with_ndistinct(4),
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -812,6 +866,7 @@ mod tests {
per_attr_stats_with_ndistinct(4),
5,
4,
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -863,6 +918,7 @@ mod tests {
per_attr_stats_with_ndistinct(4),
10,
8,
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -916,6 +972,7 @@ mod tests {
per_attr_stats_with_ndistinct(2),
20,
4,
None,
);

let attr_refs = vec![
Expand Down Expand Up @@ -964,11 +1021,29 @@ mod tests {
/// the inner will be < 1 / row count of both tables
#[tokio::test]
async fn test_outer_unique_oncond_filter() {
let join_memo = HashMap::from([(
JOIN_GROUP_ID,
MemoGroupInfo::new(
vec![
Attribute::new_non_null_int64(TEST_ATTR1_NAME.to_string()),
Attribute::new_non_null_int64(TEST_ATTR2_NAME.to_string()),
]
.into(),
GroupAttrRefs::new(
vec![
AttrRef::new_base_table_attr_ref(TEST_TABLE1_ID, 0),
AttrRef::new_base_table_attr_ref(TEST_TABLE2_ID, 0),
],
None,
),
),
)]);
let cost_model = create_two_table_mock_cost_model_custom_row_cnts(
per_attr_stats_with_dist_and_ndistinct(vec![(Value::Int32(128), 0.4)], 50),
per_attr_stats_with_ndistinct(4),
50,
4,
Some(join_memo),
);

let attr_refs = vec![
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/cost/join/hash_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
let input_correlation = get_input_correlation(left_attr_refs, right_attr_refs);
self.get_join_selectivity_from_keys(
join_typ,
group_id,
left_keys,
right_keys,
output_attr_refs.attr_refs(),
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/cost/join/nested_loop_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {

self.get_join_selectivity_from_expr_tree(
join_typ,
group_id,
join_cond,
output_attr_refs.attr_refs(),
input_correlation,
Expand Down
Loading

0 comments on commit be71afb

Please sign in to comment.