diff --git a/eggstrain/Cargo.toml b/eggstrain/Cargo.toml index 84dfb86..74e28eb 100644 --- a/eggstrain/Cargo.toml +++ b/eggstrain/Cargo.toml @@ -15,7 +15,8 @@ anyhow = "1" arrow = "50" async-trait = "0.1" datafusion = "35" -substrait = "0.24" +serde_json = "1" +substrait = { version = "0.24", features = ["serde"] } tokio = { version = "1", features = ["full"] } tokio-stream = "0.1" rayon = "1" diff --git a/eggstrain/src/execution/substrait/deserialize.rs b/eggstrain/src/execution/substrait/deserialize.rs new file mode 100644 index 0000000..bfb6321 --- /dev/null +++ b/eggstrain/src/execution/substrait/deserialize.rs @@ -0,0 +1,13 @@ +use std::fs; +use substrait::proto::Plan; + +pub fn read_str(path: &str) -> String { + fs::read_to_string(path).unwrap_or_else(|_| panic!("Unable to read file {}", path)) +} + +pub fn get_json(path: &str) -> Plan { + let plan = serde_json::from_str::(&read_str(path)) + .unwrap_or_else(|_| panic!("Could not parse json {:?} into Plan", path)); + println!("{}", serde_json::to_string_pretty(&plan).unwrap()); + plan +} diff --git a/eggstrain/src/execution/substrait/example.rs b/eggstrain/src/execution/substrait/example.rs index f471c6b..011e238 100644 --- a/eggstrain/src/execution/substrait/example.rs +++ b/eggstrain/src/execution/substrait/example.rs @@ -6,7 +6,7 @@ use substrait::proto::ReadRel; /// We want the plan to look like this: /// -/// ``` +/// ```text /// |-+ Aggregate({sales = sum(quantity_price)}, group_by=(product_name, product_id)) /// |-+ InnerJoin(on=orders.product_id = products.product_id) /// |- ReadTable(orders) diff --git a/eggstrain/src/execution/substrait/mod.rs b/eggstrain/src/execution/substrait/mod.rs index d4d8a94..58dccc1 100644 --- a/eggstrain/src/execution/substrait/mod.rs +++ b/eggstrain/src/execution/substrait/mod.rs @@ -1 +1,2 @@ pub mod example; +pub mod deserialize; diff --git a/eggstrain/src/main.rs b/eggstrain/src/main.rs index 3e0d290..746ad08 100644 --- a/eggstrain/src/main.rs +++ b/eggstrain/src/main.rs @@ -2,7 +2,11 @@ pub mod execution; pub mod scheduler_client; pub mod storage_client; +use execution::substrait::deserialize::get_json; + + #[tokio::main] async fn main() { println!("Hello, world!"); + get_json("substrait_plan_example.json"); } diff --git a/eggstrain/substrait_plan_example.json b/eggstrain/substrait_plan_example.json new file mode 100644 index 0000000..c99a3cb --- /dev/null +++ b/eggstrain/substrait_plan_example.json @@ -0,0 +1,184 @@ +{ + "extensions": [ + { + "extensionFunction": { + "functionAnchor": 1, + "name": "lte" + } + }, + { + "extensionFunction": { + "functionAnchor": 2, + "name": "is_not_null" + } + }, + { + "extensionFunction": { + "functionAnchor": 3, + "name": "and" + } + }, + { + "extensionFunction": { + "functionAnchor": 4, + "name": "count" + } + } + ], + "relations": [ + { + "root": { + "input": { + "project": { + "input": { + "aggregate": { + "input": { + "read": { + "baseSchema": { + "names": [ + "exercise", + "difficulty_level" + ], + "struct": { + "types": [ + { + "varchar": { + "length": 13, + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i32": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "filter": { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "i32": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "i32": 5 + } + } + } + ] + } + } + }, + { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "i32": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + } + ] + } + } + } + ] + } + }, + "projection": { + "select": { + "structItems": [ + {} + ] + }, + "maintainSingularStruct": true + }, + "namedTable": { + "names": [ + "crossfit" + ] + } + } + }, + "groupings": [ + {} + ], + "measures": [ + { + "measure": { + "functionReference": 4, + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + } + } + ] + } + }, + "expressions": [ + { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + ] + } + }, + "names": [ + "exercise" + ] + } + } + ], + "version": { + "minorNumber": 24, + "producer": "DuckDB" + } +} \ No newline at end of file