Skip to content

Commit

Permalink
add memo trait
Browse files Browse the repository at this point in the history
This commit adds a `Memo` trait and a first draft of an implementation
of the `Memo` trait via the backed ORM-mapped database.

move things into `memo` submodule

change to `BackendManager`

merge with cost model trait and implementation

Quick format fix

make columns nullable for cost model (#21)

add doc for cost model migrators

Add mock catalog

Fix some chores

add memo trait

move things into `memo` submodule

change to `BackendManager`

merge with cost model trait and implementation

fix duplicate def
  • Loading branch information
connortsui20 committed Nov 12, 2024
1 parent e1cba24 commit 9b74ff7
Show file tree
Hide file tree
Showing 6 changed files with 467 additions and 2 deletions.
33 changes: 31 additions & 2 deletions optd-persistent/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ mod migrator;
pub mod cost_model;
pub use cost_model::interface::CostModelStorageLayer;

mod memo;
pub use memo::interface::Memo;

/// The filename of the SQLite database for migration.
pub const DATABASE_FILENAME: &str = "sqlite.db";
/// The URL of the SQLite database for migration.
Expand All @@ -39,18 +42,29 @@ fn get_sqlite_url(file: &str) -> String {
format!("sqlite:{}?mode=rwc", file)
}

pub type StorageResult<T> = Result<T, BackendError>;

#[derive(Debug)]
pub enum CostModelError {
// TODO: Add more error types
UnknownStatisticType,
VersionedStatisticNotFound,
}

/// TODO convert this to `thiserror`
#[derive(Debug)]
/// The different kinds of errors that might occur while running operations on a memo table.
pub enum MemoError {
UnknownGroup,
UnknownLogicalExpression,
UnknownPhysicalExpression,
InvalidExpression,
Database(DbErr),
}

/// TODO convert this to `thiserror`
#[derive(Debug)]
pub enum BackendError {
CostModel(CostModelError),
Memo(MemoError),
Database(DbErr),
// TODO: Add other variants as needed for different error types
}
Expand All @@ -61,12 +75,27 @@ impl From<CostModelError> for BackendError {
}
}

impl From<MemoError> for BackendError {
fn from(value: MemoError) -> Self {
BackendError::Memo(value)
}
}

impl From<DbErr> for BackendError {
fn from(value: DbErr) -> Self {
BackendError::Database(value)
}
}

impl From<DbErr> for MemoError {
fn from(value: DbErr) -> Self {
MemoError::Database(value)
}
}

/// A type alias for a result with [`BackendError`] as the error type.
pub type StorageResult<T> = Result<T, BackendError>;

pub struct BackendManager {
db: DatabaseConnection,
}
Expand Down
11 changes: 11 additions & 0 deletions optd-persistent/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ use optd_persistent::DATABASE_URL;

#[tokio::main]
async fn main() {
basic_demo().await;
memo_demo().await;
}

async fn memo_demo() {
let _db = Database::connect(DATABASE_URL).await.unwrap();

todo!()
}

async fn basic_demo() {
let db = Database::connect(DATABASE_URL).await.unwrap();

// Create a new `CascadesGroup`.
Expand Down
73 changes: 73 additions & 0 deletions optd-persistent/src/memo/expression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::entities::*;
use std::hash::{DefaultHasher, Hash, Hasher};

/// All of the different types of fixed logical operators.
///
/// Note that there could be more operators that the memo table must support that are not enumerated
/// in this enum, as there can be up to `2^16` different types of operators.
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
#[non_exhaustive]
#[repr(i16)]
pub enum LogicalOperator {
Scan,
Join,
}

/// All of the different types of fixed physical operators.
///
/// Note that there could be more operators that the memo table must support that are not enumerated
/// in this enum, as there can be up to `2^16` different types of operators.
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
#[non_exhaustive]
#[repr(i16)]
pub enum PhysicalOperator {
TableScan,
IndexScan,
NestedLoopJoin,
HashJoin,
}

/// A method to generate a fingerprint used to efficiently check if two
/// expressions are equivalent.
///
/// TODO actually make efficient.
fn fingerprint(variant_tag: i16, data: &serde_json::Value) -> i64 {
let mut hasher = DefaultHasher::new();

variant_tag.hash(&mut hasher);
data.hash(&mut hasher);

hasher.finish() as i64
}

impl logical_expression::Model {
/// Creates a new logical expression with an unset `id` and `group_id`.
pub fn new(variant_tag: LogicalOperator, data: serde_json::Value) -> Self {
let tag = variant_tag as i16;
let fingerprint = fingerprint(tag, &data);

Self {
id: 0,
group_id: 0,
fingerprint,
variant_tag: tag,
data,
}
}
}

impl physical_expression::Model {
/// Creates a new physical expression with an unset `id` and `group_id`.
pub fn new(variant_tag: PhysicalOperator, data: serde_json::Value) -> Self {
let tag = variant_tag as i16;
let fingerprint = fingerprint(tag, &data);

Self {
id: 0,
group_id: 0,
fingerprint,
variant_tag: tag,
data,
}
}
}
138 changes: 138 additions & 0 deletions optd-persistent/src/memo/interface.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
use crate::StorageResult;

/// A trait representing an implementation of a memoization table.
///
/// Note that we use [`trait_variant`] here in order to add bounds on every method.
/// See this [blog post](
/// https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html#async-fn-in-public-traits)
/// for more information.
///
/// TODO Figure out for each when to get the ID of a record or the entire record itself.
#[trait_variant::make(Send)]
pub trait Memo {
/// A type representing a group in the Cascades framework.
type Group;
/// A type representing a unique identifier for a group.
type GroupId;
/// A type representing a logical expression.
type LogicalExpression;
/// A type representing a unique identifier for a logical expression.
type LogicalExpressionId;
/// A type representing a physical expression.
type PhysicalExpression;
/// A type representing a unique identifier for a physical expression.
type PhysicalExpressionId;

/// Retrieves a [`Self::Group`] given a [`Self::GroupId`].
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn get_group(&self, group_id: Self::GroupId) -> StorageResult<Self::Group>;

/// Retrieves all group IDs that are stored in the memo table.
async fn get_all_groups(&self) -> StorageResult<Vec<Self::Group>>;

/// Retrieves a [`Self::LogicalExpression`] given a [`Self::LogicalExpressionId`].
///
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
/// error.
async fn get_logical_expression(
&self,
logical_expression_id: Self::LogicalExpressionId,
) -> StorageResult<Self::LogicalExpression>;

/// Retrieves a [`Self::PhysicalExpression`] given a [`Self::PhysicalExpressionId`].
///
/// If the physical expression does not exist, returns a
/// [`MemoError::UnknownPhysicalExpression`] error.
async fn get_physical_expression(
&self,
physical_expression_id: Self::PhysicalExpressionId,
) -> StorageResult<Self::PhysicalExpression>;

/// Retrieves the parent group ID of a logical expression given its expression ID.
///
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
/// error.
async fn get_group_from_logical_expression(
&self,
logical_expression_id: Self::LogicalExpressionId,
) -> StorageResult<Self::GroupId>;

/// Retrieves the parent group ID of a logical expression given its expression ID.
///
/// If the physical expression does not exist, returns a
/// [`MemoError::UnknownPhysicalExpression`] error.
async fn get_group_from_physical_expression(
&self,
physical_expression_id: Self::PhysicalExpressionId,
) -> StorageResult<Self::GroupId>;

/// Retrieves all of the logical expression "children" of a group.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn get_group_logical_expressions(
&self,
group_id: Self::GroupId,
) -> StorageResult<Vec<Self::LogicalExpression>>;

/// Retrieves all of the physical expression "children" of a group.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn get_group_physical_expressions(
&self,
group_id: Self::GroupId,
) -> StorageResult<Vec<Self::PhysicalExpression>>;

/// Retrieves the best physical query plan (winner) for a given group.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn get_winner(
&self,
group_id: Self::GroupId,
) -> StorageResult<Option<Self::PhysicalExpressionId>>;

/// Updates / replaces a group's best physical plan (winner). Optionally returns the previous
/// winner's physical expression ID.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn update_group_winner(
&self,
group_id: Self::GroupId,
physical_expression_id: Self::PhysicalExpressionId,
) -> StorageResult<Option<Self::PhysicalExpressionId>>;

/// Adds a logical expression to an existing group via its [`Self::GroupId`].
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn add_logical_expression_to_group(
&self,
group_id: Self::GroupId,
logical_expression: Self::LogicalExpression,
) -> StorageResult<()>;

/// Adds a physical expression to an existing group via its [`Self::GroupId`].
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
async fn add_physical_expression_to_group(
&self,
group_id: Self::GroupId,
physical_expression: Self::PhysicalExpression,
) -> StorageResult<()>;

/// Adds a new logical expression into the memo table, creating a new group if the expression
/// does not already exist.
///
/// The [`Self::LogicalExpression`] type should have some sort of mechanism for checking if
/// the expression has been seen before, and if it has already been created, then the parent
/// group ID should also be retrievable.
///
/// If the expression already exists, then this function will return the [`Self::GroupId`] of
/// the parent group and the corresponding (already existing) [`Self::LogicalExpressionId`].
///
/// If the expression does not exist, this function will create a new group and a new
/// expression, returning brand new IDs for both.
async fn add_logical_expression(
&self,
expression: Self::LogicalExpression,
) -> StorageResult<(Self::GroupId, Self::LogicalExpressionId)>;
}
4 changes: 4 additions & 0 deletions optd-persistent/src/memo/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod expression;

pub mod interface;
pub mod orm;
Loading

0 comments on commit 9b74ff7

Please sign in to comment.