Skip to content

Commit

Permalink
Resolve conflict with main
Browse files Browse the repository at this point in the history
  • Loading branch information
lanlou1554 committed Nov 16, 2024
1 parent 5c5a40f commit 814c3d6
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 144 deletions.
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 11 additions & 9 deletions optd-cost-model/src/cost/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ use crate::{
},
cost_model::CostModelImpl,
stats::DEFAULT_NUM_DISTINCT,
CostModelError, CostModelResult, EstimatedStatistic,
CostModelError, CostModelResult, EstimatedStatistic, SemanticError,
};

impl<S: CostModelStorageLayer> CostModelImpl<S> {
pub fn get_agg_row_cnt(
pub async fn get_agg_row_cnt(
&self,
group_by: ArcPredicateNode,
) -> CostModelResult<EstimatedStatistic> {
Expand All @@ -22,22 +22,24 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
} else {
// Multiply the n-distinct of all the group by columns.
// TODO: improve with multi-dimensional n-distinct
let row_cnt = group_by.0.children.iter().try_fold(1, |acc, node| {
let mut row_cnt = 1;

for node in &group_by.0.children {
match node.typ {
PredicateType::AttributeRef => {
let attr_ref =
AttributeRefPred::from_pred_node(node.clone()).ok_or_else(|| {
CostModelError::InvalidPredicate(
SemanticError::InvalidPredicate(
"Expected AttributeRef predicate".to_string(),
)
})?;
if attr_ref.is_derived() {
Ok(acc * DEFAULT_NUM_DISTINCT)
row_cnt *= DEFAULT_NUM_DISTINCT;
} else {
let table_id = attr_ref.table_id();
let attr_idx = attr_ref.attr_index();
let stats_option =
self.get_attribute_comb_stats(table_id, &[attr_idx])?;
self.get_attribute_comb_stats(table_id, &[attr_idx]).await?;

let ndistinct = match stats_option {
Some(stats) => stats.ndistinct,
Expand All @@ -46,15 +48,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
DEFAULT_NUM_DISTINCT
}
};
Ok(acc * ndistinct)
row_cnt *= ndistinct;
}
}
_ => {
// TODO: Consider the case where `GROUP BY 1`.
panic!("GROUP BY must have attribute ref predicate")
panic!("GROUP BY must have attribute ref predicate");
}
}
})?;
}
Ok(EstimatedStatistic(row_cnt))
}
}
Expand Down
50 changes: 30 additions & 20 deletions optd-cost-model/src/cost/filter/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
/// Also, get_attribute_equality_selectivity is a subroutine when computing range
/// selectivity, which is another reason for separating these into two functions
/// is_eq means whether it's == or !=
pub(crate) fn get_attribute_equality_selectivity(
pub(crate) async fn get_attribute_equality_selectivity(
&self,
table_id: TableId,
attr_base_index: usize,
Expand All @@ -28,8 +28,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
) -> CostModelResult<f64> {
// TODO: The attribute could be a derived attribute
let ret_sel = {
if let Some(attribute_stats) =
self.get_attribute_comb_stats(table_id, &[attr_base_index])?
if let Some(attribute_stats) = self
.get_attribute_comb_stats(table_id, &[attr_base_index])
.await?
{
let eq_freq =
if let Some(freq) = attribute_stats.mcvs.freq(&vec![Some(value.clone())]) {
Expand Down Expand Up @@ -91,7 +92,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
}

/// Compute the frequency of values in a attribute less than the given value.
fn get_attribute_lt_value_freq(
async fn get_attribute_lt_value_freq(
&self,
attribute_stats: &AttributeCombValueStats,
table_id: TableId,
Expand All @@ -102,7 +103,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
// into total_leq_cdf this logic just so happens to be the exact same logic as
// get_attribute_equality_selectivity implements
let ret_freq = Self::get_attribute_leq_value_freq(attribute_stats, value)
- self.get_attribute_equality_selectivity(table_id, attr_base_index, value, true)?;
- self
.get_attribute_equality_selectivity(table_id, attr_base_index, value, true)
.await?;
assert!(
(0.0..=1.0).contains(&ret_freq),
"ret_freq ({}) should be in [0, 1]",
Expand All @@ -116,25 +119,29 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
/// Range predicates are handled entirely differently from equality predicates so this is its
/// own function. If it is unable to find the statistics, it returns DEFAULT_INEQ_SEL.
/// The selectivity is computed as quantile of the right bound minus quantile of the left bound.
pub(crate) fn get_attribute_range_selectivity(
pub(crate) async fn get_attribute_range_selectivity(
&self,
table_id: TableId,
attr_base_index: usize,
start: Bound<&Value>,
end: Bound<&Value>,
) -> CostModelResult<f64> {
// TODO: Consider attribute is a derived attribute
if let Some(attribute_stats) =
self.get_attribute_comb_stats(table_id, &[attr_base_index])?
if let Some(attribute_stats) = self
.get_attribute_comb_stats(table_id, &[attr_base_index])
.await?
{
let left_quantile = match start {
Bound::Unbounded => 0.0,
Bound::Included(value) => self.get_attribute_lt_value_freq(
&attribute_stats,
table_id,
attr_base_index,
value,
)?,
Bound::Included(value) => {
self.get_attribute_lt_value_freq(
&attribute_stats,
table_id,
attr_base_index,
value,
)
.await?
}
Bound::Excluded(value) => {
Self::get_attribute_leq_value_freq(&attribute_stats, value)
}
Expand All @@ -144,12 +151,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
Bound::Included(value) => {
Self::get_attribute_leq_value_freq(&attribute_stats, value)
}
Bound::Excluded(value) => self.get_attribute_lt_value_freq(
&attribute_stats,
table_id,
attr_base_index,
value,
)?,
Bound::Excluded(value) => {
self.get_attribute_lt_value_freq(
&attribute_stats,
table_id,
attr_base_index,
value,
)
.await?
}
};
assert!(
left_quantile <= right_quantile,
Expand Down
42 changes: 28 additions & 14 deletions optd-cost-model/src/cost/filter/comp_op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ use crate::{
// compute the selectivity.
stats::{DEFAULT_EQ_SEL, DEFAULT_INEQ_SEL, UNIMPLEMENTED_SEL},
CostModelResult,
SemanticError,
};

impl<S: CostModelStorageLayer> CostModelImpl<S> {
/// Comparison operators are the base case for recursion in get_filter_selectivity()
pub(crate) fn get_comp_op_selectivity(
pub(crate) async fn get_comp_op_selectivity(
&self,
comp_bin_op_typ: BinOpType,
left: ArcPredicateNode,
Expand All @@ -30,8 +31,11 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {

// I intentionally performed moves on left and right. This way, we don't accidentally use
// them after this block
let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) =
self.get_semantic_nodes(left, right)?;
let semantic_res = self.get_semantic_nodes(left, right).await;
if semantic_res.is_err() {
return Ok(Self::get_default_comparison_op_selectivity(comp_bin_op_typ));
}
let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) = semantic_res.unwrap();

// Handle the different cases of semantic nodes.
if attr_ref_exprs.is_empty() {
Expand All @@ -51,13 +55,17 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
match comp_bin_op_typ {
BinOpType::Eq => {
self.get_attribute_equality_selectivity(table_id, attr_ref_idx, value, true)
.await
}
BinOpType::Neq => {
self.get_attribute_equality_selectivity(
table_id,
attr_ref_idx,
value,
false,
)
.await
}
BinOpType::Neq => self.get_attribute_equality_selectivity(
table_id,
attr_ref_idx,
value,
false,
),
BinOpType::Lt | BinOpType::Leq | BinOpType::Gt | BinOpType::Geq => {
let start = match (comp_bin_op_typ, is_left_attr_ref) {
(BinOpType::Lt, true) | (BinOpType::Geq, false) => Bound::Unbounded,
Expand All @@ -74,6 +82,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
_ => unreachable!("all comparison BinOpTypes were enumerated. this should be unreachable"),
};
self.get_attribute_range_selectivity(table_id, attr_ref_idx, start, end)
.await
}
_ => unreachable!(
"all comparison BinOpTypes were enumerated. this should be unreachable"
Expand Down Expand Up @@ -109,7 +118,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
/// This is convenient to avoid repeating the same logic just with "left" and "right" swapped.
/// The last return value is true when the input node (left) is a AttributeRefPred.
#[allow(clippy::type_complexity)]
fn get_semantic_nodes(
async fn get_semantic_nodes(
&self,
left: ArcPredicateNode,
right: ArcPredicateNode,
Expand Down Expand Up @@ -175,11 +184,16 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
// The "invert" cast is to invert the cast so that we're casting the
// non_cast_node to the attribute's original type.
// TODO(migration): double check
let invert_cast_data_type = &(self
// TODO: Consider attribute info is None.
let attribute_info = self
.storage_manager
.get_attribute_info(table_id, attr_ref_idx as i32)?
.typ
.into_data_type());
.get_attribute_info(table_id, attr_ref_idx as i32)
.await?
.ok_or({
SemanticError::AttributeNotFound(table_id, attr_ref_idx as i32)
})?;

let invert_cast_data_type = &attribute_info.typ.into_data_type();

match non_cast_node.typ {
PredicateType::AttributeRef => {
Expand Down
Loading

0 comments on commit 814c3d6

Please sign in to comment.