Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cost-model): Initial framework of compute cost migration #31

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,678 changes: 3,678 additions & 0 deletions optd-cost-model/Cargo.lock

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions optd-cost-model/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "optd-cost-model"
version = "0.1.0"
edition = "2021"

[dependencies]
optd-persistent = { path = "../optd-persistent", version = "0.1" }
serde = { version = "1.0", features = ["derive"] }
arrow-schema = "53.2.0"
datafusion-expr = "32.0.0"
ordered-float = "4.0"
chrono = "0.4"

4 changes: 4 additions & 0 deletions optd-cost-model/src/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod nodes;
pub mod predicates;
pub mod types;
pub mod values;
96 changes: 96 additions & 0 deletions optd-cost-model/src/common/nodes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
use std::sync::Arc;

use arrow_schema::DataType;

use super::{
predicates::{
bin_op_pred::BinOpType, constant_pred::ConstantType, func_pred::FuncType,
log_op_pred::LogOpType, sort_order_pred::SortOrderType, un_op_pred::UnOpType,
},
values::Value,
};

/// TODO: documentation
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JoinType {
Inner = 1,
FullOuter,
LeftOuter,
RightOuter,
Cross,
LeftSemi,
RightSemi,
LeftAnti,
RightAnti,
}

/// TODO: documentation
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhysicalNodeType {
PhysicalProjection,
PhysicalFilter,
PhysicalScan,
PhysicalSort,
PhysicalAgg,
PhysicalHashJoin(JoinType),
PhysicalNestedLoopJoin(JoinType),
PhysicalEmptyRelation,
PhysicalLimit,
}

impl std::fmt::Display for PhysicalNodeType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

/// TODO: documentation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PredicateType {
List,
Constant(ConstantType),
AttributeRef,
ExternAttributeRef,
UnOp(UnOpType),
BinOp(BinOpType),
LogOp(LogOpType),
Func(FuncType),
SortOrder(SortOrderType),
Between,
Cast,
Like,
DataType(DataType),
InList,
}

impl std::fmt::Display for PredicateType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

pub type ArcPredicateNode = Arc<PredicateNode>;

/// TODO: documentation
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct PredicateNode {
/// A generic predicate node type
pub typ: PredicateType,
/// Child predicate nodes, always materialized
pub children: Vec<PredicateNode>,
/// Data associated with the predicate, if any
pub data: Option<Value>,
}

impl std::fmt::Display for PredicateNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({}", self.typ)?;
for child in &self.children {
write!(f, " {}", child)?;
}
if let Some(data) = &self.data {
write!(f, " {}", data)?;
}
write!(f, ")")
}
}
40 changes: 40 additions & 0 deletions optd-cost-model/src/common/predicates/bin_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum BinOpType {
// numerical
Add,
Sub,
Mul,
Div,
Mod,

// comparison
Eq,
Neq,
Gt,
Lt,
Geq,
Leq,
}

impl std::fmt::Display for BinOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

impl BinOpType {
pub fn is_numerical(&self) -> bool {
matches!(
self,
Self::Add | Self::Sub | Self::Mul | Self::Div | Self::Mod
)
}

pub fn is_comparison(&self) -> bool {
matches!(
self,
Self::Eq | Self::Neq | Self::Gt | Self::Lt | Self::Geq | Self::Leq
)
}
}
21 changes: 21 additions & 0 deletions optd-cost-model/src/common/predicates/constant_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use serde::{Deserialize, Serialize};

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
pub enum ConstantType {
Bool,
Utf8String,
UInt8,
UInt16,
UInt32,
UInt64,
Int8,
Int16,
Int32,
Int64,
Float64,
Date,
IntervalMonthDateNano,
Decimal,
Binary,
}
23 changes: 23 additions & 0 deletions optd-cost-model/src/common/predicates/func_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/// TODO: documentation
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum FuncType {
Scalar(datafusion_expr::BuiltinScalarFunction),
Agg(datafusion_expr::AggregateFunction),
Case,
}

impl std::fmt::Display for FuncType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

impl FuncType {
pub fn new_scalar(func_id: datafusion_expr::BuiltinScalarFunction) -> Self {
FuncType::Scalar(func_id)
}

pub fn new_agg(func_id: datafusion_expr::AggregateFunction) -> Self {
FuncType::Agg(func_id)
}
}
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/log_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum LogOpType {
And,
Or,
}

impl Display for LogOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
6 changes: 6 additions & 0 deletions optd-cost-model/src/common/predicates/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pub mod bin_op_pred;
pub mod constant_pred;
pub mod func_pred;
pub mod log_op_pred;
pub mod sort_order_pred;
pub mod un_op_pred;
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/sort_order_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum SortOrderType {
Asc,
Desc,
}

impl Display for SortOrderType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
14 changes: 14 additions & 0 deletions optd-cost-model/src/common/predicates/un_op_pred.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum UnOpType {
Neg = 1,
Not,
}

impl Display for UnOpType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
51 changes: 51 additions & 0 deletions optd-cost-model/src/common/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use std::fmt::Display;

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct GroupId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct ExprId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct TableId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct AttrId(pub usize);

/// TODO: documentation
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct EpochId(pub usize);

impl Display for GroupId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "!{}", self.0)
}
}

impl Display for ExprId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}

impl Display for TableId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Table#{}", self.0)
}
}

impl Display for AttrId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Attr#{}", self.0)
}
}

impl Display for EpochId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Epoch#{}", self.0)
}
}
Loading