Resolve conflict with main

cmu-db · Nov 16, 2024 · 814c3d6 · 814c3d6
1 parent 5c5a40f
commit 814c3d6
Show file tree

Hide file tree

Showing 14 changed files with 249 additions and 144 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/optd-cost-model/src/cost/agg.rs b/optd-cost-model/src/cost/agg.rs
@@ -8,11 +8,11 @@ use crate::{
     },
     cost_model::CostModelImpl,
     stats::DEFAULT_NUM_DISTINCT,
-    CostModelError, CostModelResult, EstimatedStatistic,
+    CostModelError, CostModelResult, EstimatedStatistic, SemanticError,
 };
 
 impl<S: CostModelStorageLayer> CostModelImpl<S> {
-    pub fn get_agg_row_cnt(
+    pub async fn get_agg_row_cnt(
         &self,
         group_by: ArcPredicateNode,
     ) -> CostModelResult<EstimatedStatistic> {
@@ -22,22 +22,24 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
         } else {
             // Multiply the n-distinct of all the group by columns.
             // TODO: improve with multi-dimensional n-distinct
-            let row_cnt = group_by.0.children.iter().try_fold(1, |acc, node| {
+            let mut row_cnt = 1;
+
+            for node in &group_by.0.children {
                 match node.typ {
                     PredicateType::AttributeRef => {
                         let attr_ref =
                             AttributeRefPred::from_pred_node(node.clone()).ok_or_else(|| {
-                                CostModelError::InvalidPredicate(
+                                SemanticError::InvalidPredicate(
                                     "Expected AttributeRef predicate".to_string(),
                                 )
                             })?;
                         if attr_ref.is_derived() {
-                            Ok(acc * DEFAULT_NUM_DISTINCT)
+                            row_cnt *= DEFAULT_NUM_DISTINCT;
                         } else {
                             let table_id = attr_ref.table_id();
                             let attr_idx = attr_ref.attr_index();
                             let stats_option =
-                                self.get_attribute_comb_stats(table_id, &[attr_idx])?;
+                                self.get_attribute_comb_stats(table_id, &[attr_idx]).await?;
 
                             let ndistinct = match stats_option {
                                 Some(stats) => stats.ndistinct,
@@ -46,15 +48,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
                                     DEFAULT_NUM_DISTINCT
                                 }
                             };
-                            Ok(acc * ndistinct)
+                            row_cnt *= ndistinct;
                         }
                     }
                     _ => {
                         // TODO: Consider the case where `GROUP BY 1`.
-                        panic!("GROUP BY must have attribute ref predicate")
+                        panic!("GROUP BY must have attribute ref predicate");
                     }
                 }
-            })?;
+            }
             Ok(EstimatedStatistic(row_cnt))
         }
     }

diff --git a/optd-cost-model/src/cost/filter/attribute.rs b/optd-cost-model/src/cost/filter/attribute.rs
@@ -19,7 +19,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
     /// Also, get_attribute_equality_selectivity is a subroutine when computing range
     /// selectivity, which is another     reason for separating these into two functions
     /// is_eq means whether it's == or !=
-    pub(crate) fn get_attribute_equality_selectivity(
+    pub(crate) async fn get_attribute_equality_selectivity(
         &self,
         table_id: TableId,
         attr_base_index: usize,
@@ -28,8 +28,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
     ) -> CostModelResult<f64> {
         // TODO: The attribute could be a derived attribute
         let ret_sel = {
-            if let Some(attribute_stats) =
-                self.get_attribute_comb_stats(table_id, &[attr_base_index])?
+            if let Some(attribute_stats) = self
+                .get_attribute_comb_stats(table_id, &[attr_base_index])
+                .await?
             {
                 let eq_freq =
                     if let Some(freq) = attribute_stats.mcvs.freq(&vec![Some(value.clone())]) {
@@ -91,7 +92,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
     }
 
     /// Compute the frequency of values in a attribute less than the given value.
-    fn get_attribute_lt_value_freq(
+    async fn get_attribute_lt_value_freq(
         &self,
         attribute_stats: &AttributeCombValueStats,
         table_id: TableId,
@@ -102,7 +103,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
         // into total_leq_cdf this logic just so happens to be the exact same logic as
         // get_attribute_equality_selectivity implements
         let ret_freq = Self::get_attribute_leq_value_freq(attribute_stats, value)
-            - self.get_attribute_equality_selectivity(table_id, attr_base_index, value, true)?;
+            - self
+                .get_attribute_equality_selectivity(table_id, attr_base_index, value, true)
+                .await?;
         assert!(
             (0.0..=1.0).contains(&ret_freq),
             "ret_freq ({}) should be in [0, 1]",
@@ -116,25 +119,29 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
     /// Range predicates are handled entirely differently from equality predicates so this is its
     /// own function. If it is unable to find the statistics, it returns DEFAULT_INEQ_SEL.
     /// The selectivity is computed as quantile of the right bound minus quantile of the left bound.
-    pub(crate) fn get_attribute_range_selectivity(
+    pub(crate) async fn get_attribute_range_selectivity(
         &self,
         table_id: TableId,
         attr_base_index: usize,
         start: Bound<&Value>,
         end: Bound<&Value>,
     ) -> CostModelResult<f64> {
         // TODO: Consider attribute is a derived attribute
-        if let Some(attribute_stats) =
-            self.get_attribute_comb_stats(table_id, &[attr_base_index])?
+        if let Some(attribute_stats) = self
+            .get_attribute_comb_stats(table_id, &[attr_base_index])
+            .await?
         {
             let left_quantile = match start {
                 Bound::Unbounded => 0.0,
-                Bound::Included(value) => self.get_attribute_lt_value_freq(
-                    &attribute_stats,
-                    table_id,
-                    attr_base_index,
-                    value,
-                )?,
+                Bound::Included(value) => {
+                    self.get_attribute_lt_value_freq(
+                        &attribute_stats,
+                        table_id,
+                        attr_base_index,
+                        value,
+                    )
+                    .await?
+                }
                 Bound::Excluded(value) => {
                     Self::get_attribute_leq_value_freq(&attribute_stats, value)
                 }
@@ -144,12 +151,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
                 Bound::Included(value) => {
                     Self::get_attribute_leq_value_freq(&attribute_stats, value)
                 }
-                Bound::Excluded(value) => self.get_attribute_lt_value_freq(
-                    &attribute_stats,
-                    table_id,
-                    attr_base_index,
-                    value,
-                )?,
+                Bound::Excluded(value) => {
+                    self.get_attribute_lt_value_freq(
+                        &attribute_stats,
+                        table_id,
+                        attr_base_index,
+                        value,
+                    )
+                    .await?
+                }
             };
             assert!(
                 left_quantile <= right_quantile,

diff --git a/optd-cost-model/src/cost/filter/comp_op.rs b/optd-cost-model/src/cost/filter/comp_op.rs
@@ -16,11 +16,12 @@ use crate::{
     // compute the selectivity.
     stats::{DEFAULT_EQ_SEL, DEFAULT_INEQ_SEL, UNIMPLEMENTED_SEL},
     CostModelResult,
+    SemanticError,
 };
 
 impl<S: CostModelStorageLayer> CostModelImpl<S> {
     /// Comparison operators are the base case for recursion in get_filter_selectivity()
-    pub(crate) fn get_comp_op_selectivity(
+    pub(crate) async fn get_comp_op_selectivity(
         &self,
         comp_bin_op_typ: BinOpType,
         left: ArcPredicateNode,
@@ -30,8 +31,11 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
 
         // I intentionally performed moves on left and right. This way, we don't accidentally use
         // them after this block
-        let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) =
-            self.get_semantic_nodes(left, right)?;
+        let semantic_res = self.get_semantic_nodes(left, right).await;
+        if semantic_res.is_err() {
+            return Ok(Self::get_default_comparison_op_selectivity(comp_bin_op_typ));
+        }
+        let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) = semantic_res.unwrap();
 
         // Handle the different cases of semantic nodes.
         if attr_ref_exprs.is_empty() {
@@ -51,13 +55,17 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
                 match comp_bin_op_typ {
                     BinOpType::Eq => {
                         self.get_attribute_equality_selectivity(table_id, attr_ref_idx, value, true)
+                            .await
+                    }
+                    BinOpType::Neq => {
+                        self.get_attribute_equality_selectivity(
+                            table_id,
+                            attr_ref_idx,
+                            value,
+                            false,
+                        )
+                        .await
                     }
-                    BinOpType::Neq => self.get_attribute_equality_selectivity(
-                        table_id,
-                        attr_ref_idx,
-                        value,
-                        false,
-                    ),
                     BinOpType::Lt | BinOpType::Leq | BinOpType::Gt | BinOpType::Geq => {
                         let start = match (comp_bin_op_typ, is_left_attr_ref) {
                                 (BinOpType::Lt, true) | (BinOpType::Geq, false) => Bound::Unbounded,
@@ -74,6 +82,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
                                 _ => unreachable!("all comparison BinOpTypes were enumerated. this should be unreachable"),
                             };
                         self.get_attribute_range_selectivity(table_id, attr_ref_idx, start, end)
+                            .await
                     }
                     _ => unreachable!(
                         "all comparison BinOpTypes were enumerated. this should be unreachable"
@@ -109,7 +118,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
     /// This is convenient to avoid repeating the same logic just with "left" and "right" swapped.
     /// The last return value is true when the input node (left) is a AttributeRefPred.
     #[allow(clippy::type_complexity)]
-    fn get_semantic_nodes(
+    async fn get_semantic_nodes(
         &self,
         left: ArcPredicateNode,
         right: ArcPredicateNode,
@@ -175,11 +184,16 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
                         // The "invert" cast is to invert the cast so that we're casting the
                         // non_cast_node to the attribute's original type.
                         // TODO(migration): double check
-                        let invert_cast_data_type = &(self
+                        // TODO: Consider attribute info is None.
+                        let attribute_info = self
                             .storage_manager
-                            .get_attribute_info(table_id, attr_ref_idx as i32)?
-                            .typ
-                            .into_data_type());
+                            .get_attribute_info(table_id, attr_ref_idx as i32)
+                            .await?
+                            .ok_or({
+                                SemanticError::AttributeNotFound(table_id, attr_ref_idx as i32)
+                            })?;
+
+                        let invert_cast_data_type = &attribute_info.typ.into_data_type();
 
                         match non_cast_node.typ {
                             PredicateType::AttributeRef => {