Skip to content

Commit

Permalink
1. add compute_cost 2. add todo documentation 3. improve type like ta…
Browse files Browse the repository at this point in the history
…bleid 4. CMSL->S
  • Loading branch information
lanlou1554 committed Nov 14, 2024
1 parent b7034ae commit 9b6f70b
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 30 deletions.
4 changes: 4 additions & 0 deletions optd-cost-model/src/common/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use super::{
values::Value,
};

/// TODO: documentation
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JoinType {
Inner = 1,
Expand All @@ -23,6 +24,7 @@ pub enum JoinType {
RightAnti,
}

/// TODO: documentation
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhysicalNodeType {
PhysicalProjection,
Expand All @@ -42,6 +44,7 @@ impl std::fmt::Display for PhysicalNodeType {
}
}

/// TODO: documentation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PredicateType {
List,
Expand All @@ -68,6 +71,7 @@ impl std::fmt::Display for PredicateType {

pub type ArcPredicateNode = Arc<PredicateNode>;

/// TODO: documentation
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct PredicateNode {
/// A generic predicate node type
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/bin_op_pred.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum BinOpType {
// numerical
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/constant_pred.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use serde::{Deserialize, Serialize};

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
pub enum ConstantType {
Bool,
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/func_pred.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/// TODO: documentation
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum FuncType {
Scalar(datafusion_expr::BuiltinScalarFunction),
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/log_op_pred.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum LogOpType {
And,
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/sort_order_pred.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum SortOrderType {
Asc,
Expand Down
1 change: 1 addition & 0 deletions optd-cost-model/src/common/predicates/un_op_pred.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum UnOpType {
Neg = 1,
Expand Down
29 changes: 26 additions & 3 deletions optd-cost-model/src/common/types.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct GroupId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct ExprId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct PredId(pub usize);
pub struct TableId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct AttrId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct EpochId(pub usize);

impl Display for GroupId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Expand All @@ -21,8 +32,20 @@ impl Display for ExprId {
}
}

impl Display for PredId {
impl Display for TableId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Table#{}", self.0)
}
}

impl Display for AttrId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Attr#{}", self.0)
}
}

impl Display for EpochId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "P{}", self.0)
write!(f, "Epoch#{}", self.0)
}
}
5 changes: 5 additions & 0 deletions optd-cost-model/src/common/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ use ordered_float::OrderedFloat;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::sync::Arc;

/// TODO: documentation
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct SerializableOrderedF64(pub OrderedFloat<f64>);

/// TODO: documentation
impl Serialize for SerializableOrderedF64 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
Expand All @@ -17,6 +19,7 @@ impl Serialize for SerializableOrderedF64 {
}
}

/// TODO: documentation
impl<'de> Deserialize<'de> for SerializableOrderedF64 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
Expand All @@ -28,6 +31,7 @@ impl<'de> Deserialize<'de> for SerializableOrderedF64 {
}
}

/// TODO: documentation
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
pub enum Value {
UInt8(u8),
Expand Down Expand Up @@ -69,6 +73,7 @@ impl std::fmt::Display for Value {
}
}

/// TODO: documentation
/// The `as_*()` functions do not perform conversions. This is *unlike* the `as`
/// keyword in rust.
///
Expand Down
41 changes: 26 additions & 15 deletions optd-cost-model/src/cost_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@ use optd_persistent::{
use crate::{
common::{
nodes::{ArcPredicateNode, PhysicalNodeType},
types::ExprId,
types::{AttrId, EpochId, ExprId, TableId},
},
storage::CostModelStorageManager,
ComputeCostContext, CostModel, CostModelResult, EstimatedStatistic,
ComputeCostContext, Cost, CostModel, CostModelResult, EstimatedStatistic, StatValue,
};

pub struct CostModelImpl<CMSL: CostModelStorageLayer> {
storage_manager: CostModelStorageManager<CMSL>,
/// TODO: documentation
pub struct CostModelImpl<S: CostModelStorageLayer> {
storage_manager: CostModelStorageManager<S>,
default_catalog_source: CatalogSource,
}

impl<CMSL: CostModelStorageLayer> CostModelImpl<CMSL> {
impl<S: CostModelStorageLayer> CostModelImpl<S> {
/// TODO: documentation
pub fn new(
storage_manager: CostModelStorageManager<CMSL>,
storage_manager: CostModelStorageManager<S>,
default_catalog_source: CatalogSource,
) -> Self {
Self {
Expand All @@ -33,7 +35,17 @@ impl<CMSL: CostModelStorageLayer> CostModelImpl<CMSL> {
}
}

impl<CMSL: CostModelStorageLayer + std::marker::Sync + 'static> CostModel for CostModelImpl<CMSL> {
impl<S: CostModelStorageLayer + std::marker::Sync + 'static> CostModel for CostModelImpl<S> {
fn compute_operation_cost(
&self,
node: &PhysicalNodeType,
predicates: &[ArcPredicateNode],
children_stats: &[Option<&EstimatedStatistic>],
context: Option<ComputeCostContext>,
) -> CostModelResult<Cost> {
todo!()
}

fn derive_statistics(
&self,
node: PhysicalNodeType,
Expand All @@ -56,27 +68,26 @@ impl<CMSL: CostModelStorageLayer + std::marker::Sync + 'static> CostModel for Co
fn get_table_statistic_for_analysis(
&self,
// TODO: i32 should be changed to TableId.
table_id: i32,
table_id: TableId,
stat_type: StatType,
epoch_id: Option<i32>,
) -> CostModelResult<Option<crate::StatValue>> {
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<StatValue>> {
todo!()
}

fn get_attribute_statistic_for_analysis(
&self,
// TODO: i32 should be changed to AttrId or EpochId.
attr_ids: Vec<i32>,
attr_ids: Vec<AttrId>,
stat_type: StatType,
epoch_id: Option<i32>,
) -> CostModelResult<Option<crate::StatValue>> {
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<StatValue>> {
todo!()
}

fn get_cost_for_analysis(
&self,
expr_id: ExprId,
epoch_id: Option<i32>,
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<crate::Cost>> {
todo!()
}
Expand Down
28 changes: 20 additions & 8 deletions optd-cost-model/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use common::{
nodes::{ArcPredicateNode, PhysicalNodeType},
types::{ExprId, GroupId},
types::{AttrId, EpochId, ExprId, GroupId, TableId},
};
use optd_persistent::cost_model::interface::{Stat, StatType};

Expand Down Expand Up @@ -38,9 +38,20 @@ pub enum CostModelError {
}

pub trait CostModel: 'static + Send + Sync {
/// TODO: documentation
fn compute_operation_cost(
&self,
node: &PhysicalNodeType,
predicates: &[ArcPredicateNode],
children_stats: &[Option<&EstimatedStatistic>],
context: Option<ComputeCostContext>,
) -> CostModelResult<Cost>;

/// TODO: documentation
/// It is for cardinality estimation. The output should be the estimated
/// statistic calculated by the cost model.
/// TODO: Consider make it a helper function, so we can store Cost in the
/// ORM more easily.
fn derive_statistics(
&self,
node: PhysicalNodeType,
Expand All @@ -59,25 +70,26 @@ pub trait CostModel: 'static + Send + Sync {
data: String,
) -> CostModelResult<()>;

/// TODO: documentation
fn get_table_statistic_for_analysis(
&self,
// TODO: i32 should be changed to TableId.
table_id: i32,
table_id: TableId,
stat_type: StatType,
epoch_id: Option<i32>,
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<StatValue>>;

/// TODO: documentation
fn get_attribute_statistic_for_analysis(
&self,
// TODO: i32 should be changed to AttrId or EpochId.
attr_ids: Vec<i32>,
attr_ids: Vec<AttrId>,
stat_type: StatType,
epoch_id: Option<i32>,
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<StatValue>>;

/// TODO: documentation
fn get_cost_for_analysis(
&self,
expr_id: ExprId,
epoch_id: Option<i32>,
epoch_id: Option<EpochId>,
) -> CostModelResult<Option<Cost>>;
}
10 changes: 6 additions & 4 deletions optd-cost-model/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ use std::sync::Arc;

use optd_persistent::CostModelStorageLayer;

pub struct CostModelStorageManager<CMSL: CostModelStorageLayer> {
pub backend_manager: Arc<CMSL>,
/// TODO: documentation
pub struct CostModelStorageManager<S: CostModelStorageLayer> {
pub backend_manager: Arc<S>,
// TODO: in-memory cache
}

impl<CMSL: CostModelStorageLayer> CostModelStorageManager<CMSL> {
pub fn new(backend_manager: Arc<CMSL>) -> Self {
impl<S: CostModelStorageLayer> CostModelStorageManager<S> {
/// TODO: documentation
pub fn new(backend_manager: Arc<S>) -> Self {
Self { backend_manager }
}
}

0 comments on commit 9b6f70b

Please sign in to comment.