From be89667eb8844c47716ed6ca311a484d136d4c9d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sat, 17 Feb 2024 22:13:13 -0500 Subject: [PATCH] remove substrait --- .gitignore | 2 + .../src/execution/substrait/deserialize.rs | 20 ------- eggstrain/src/execution/substrait/example.rs | 52 ------------------- eggstrain/src/execution/substrait/mod.rs | 3 -- .../execution/substrait/substrait_arrow.rs | 20 ------- 5 files changed, 2 insertions(+), 95 deletions(-) delete mode 100644 eggstrain/src/execution/substrait/deserialize.rs delete mode 100644 eggstrain/src/execution/substrait/example.rs delete mode 100644 eggstrain/src/execution/substrait/mod.rs delete mode 100644 eggstrain/src/execution/substrait/substrait_arrow.rs diff --git a/.gitignore b/.gitignore index 6985cf1..1c0f3e8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ Cargo.lock # MSVC Windows builds of rustc generate these, which store debugging information *.pdb + +data/ diff --git a/eggstrain/src/execution/substrait/deserialize.rs b/eggstrain/src/execution/substrait/deserialize.rs deleted file mode 100644 index 5123102..0000000 --- a/eggstrain/src/execution/substrait/deserialize.rs +++ /dev/null @@ -1,20 +0,0 @@ -use std::fs; -use substrait::proto::{Plan, ReadRel}; - -pub fn read_str(path: &str) -> String { - fs::read_to_string(path).unwrap_or_else(|_| panic!("Unable to read file {}", path)) -} - -pub fn get_plan(path: &str) -> Plan { - let plan = serde_json::from_str::(&read_str(path)) - .unwrap_or_else(|_| panic!("Could not parse json {:?} into Plan", path)); - println!("{}", serde_json::to_string_pretty(&plan).unwrap()); - plan -} - -pub fn get_read(path: &str) -> ReadRel { - let read = serde_json::from_str::(&read_str(path)) - .unwrap_or_else(|_| panic!("Could not parse json {:?} into Plan", path)); - println!("{}", serde_json::to_string_pretty(&read).unwrap()); - read -} diff --git a/eggstrain/src/execution/substrait/example.rs b/eggstrain/src/execution/substrait/example.rs deleted file mode 100644 index 011e238..0000000 --- a/eggstrain/src/execution/substrait/example.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Want to be able to parse substrait into Arrow schema - -use arrow::datatypes::{DataType, Field, Schema}; -use substrait::proto::rel::RelType; -use substrait::proto::ReadRel; - -/// We want the plan to look like this: -/// -/// ```text -/// |-+ Aggregate({sales = sum(quantity_price)}, group_by=(product_name, product_id)) -/// |-+ InnerJoin(on=orders.product_id = products.product_id) -/// |- ReadTable(orders) -/// |-+ Filter(INDEX_IN("Computers", categories) IS NULL) -/// |- ReadTable(products) -/// ``` -pub fn tpch_query1() -> RelType { - // Go bottom up - let read = ReadRel { - common: None, - base_schema: todo!(), - filter: todo!(), - best_effort_filter: todo!(), - projection: todo!(), - advanced_extension: todo!(), - read_type: todo!(), - }; - - todo!() -} - -pub fn tpch_lineitem_schema() -> Schema { - let fields = vec![ - Field::new("L_ORDERKEY", DataType::UInt64, false), - Field::new("L_PARTKEY", DataType::UInt64, false), - Field::new("L_SUPPKEY", DataType::UInt64, false), - Field::new("L_LINENUMBER", DataType::Int64, false), - Field::new("L_QUANTITY", DataType::Decimal128(16, 16), false), - Field::new("L_EXTENDEDPRICE", DataType::Decimal128(16, 16), false), - Field::new("L_DISCOUNT", DataType::Decimal128(16, 16), false), - Field::new("L_TAX", DataType::Decimal128(16, 16), false), - Field::new("L_RETURNFLAG", DataType::Boolean, false), - Field::new("L_LINESTATUS", DataType::Boolean, false), - Field::new("L_SHIPDATE", DataType::Date32, false), - Field::new("L_COMMITDATE", DataType::Date32, false), - Field::new("L_RECEIPTDATE", DataType::Date32, false), - Field::new("L_SHIPINSTRUCT", DataType::FixedSizeBinary(25), false), - Field::new("L_SHIPMODE", DataType::FixedSizeBinary(10), false), - Field::new("L_COMMENT", DataType::FixedSizeBinary(44), false), - ]; - - Schema::new(fields) -} diff --git a/eggstrain/src/execution/substrait/mod.rs b/eggstrain/src/execution/substrait/mod.rs deleted file mode 100644 index 0fb207d..0000000 --- a/eggstrain/src/execution/substrait/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod deserialize; -pub mod example; -pub mod substrait_arrow; diff --git a/eggstrain/src/execution/substrait/substrait_arrow.rs b/eggstrain/src/execution/substrait/substrait_arrow.rs deleted file mode 100644 index 42b7c20..0000000 --- a/eggstrain/src/execution/substrait/substrait_arrow.rs +++ /dev/null @@ -1,20 +0,0 @@ -use arrow::datatypes::{DataType, Schema}; -use std::sync::Arc; -use substrait::proto::{Type, NamedStruct}; - -/// https://docs.rs/substrait/latest/substrait/proto/struct.NamedStruct.html -/// https://docs.rs/substrait/latest/substrait/proto/type/struct.Struct.html -pub fn schema_translate(substrait_schema: NamedStruct) -> Arc { - for (i, name) in substrait_schema.names.iter().enumerate() { - todo!() - } - - todo!() -} - -/// https://docs.rs/substrait/latest/substrait/proto/struct.Type.html -/// https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html -pub fn type_translate(typ: Type) -> DataType { - todo!() -} -