Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
import sql_dfpp
Browse files Browse the repository at this point in the history
  • Loading branch information
connortsui20 committed Feb 18, 2024
1 parent 184ec7e commit 7ed7c71
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 0 deletions.
12 changes: 12 additions & 0 deletions sql_dfphysicalplan/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "sql_dfphysicalplan"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "50", features = ["prettyprint"] }
datafusion = "35"
datafusion-common = "35"
tokio = { version = "1", features = ["full"] }
21 changes: 21 additions & 0 deletions sql_dfphysicalplan/queries/TPC-H_Q1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
SELECT
L_RETURNFLAG,
L_LINESTATUS,
SUM(L_QUANTITY) AS SUM_QTY,
SUM(L_EXTENDEDPRICE) AS SUM_BASE_PRICE,
SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS SUM_DISC_PRICE,
SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT) * (1 + L_TAX)) AS SUM_CHARGE,
AVG(L_QUANTITY) AS AVG_QTY,
AVG(L_EXTENDEDPRICE) AS AVG_PRICE,
AVG(L_DISCOUNT) AS AVG_DISC,
COUNT(*) AS COUNT_ORDER
FROM
LINEITEM
WHERE
L_SHIPDATE <= DATE '1998-12-01' - INTERVAL '93 DAY'
GROUP BY
L_RETURNFLAG,
L_LINESTATUS
ORDER BY
L_RETURNFLAG,
L_LINESTATUS;
13 changes: 13 additions & 0 deletions sql_dfphysicalplan/queries/test_query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SELECT
L_RETURNFLAG,
L_LINESTATUS,
SUM(L_QUANTITY) AS SUM_QTY,
AVG(L_QUANTITY) AS AVG_QTY,
COUNT(*) AS COUNT_ORDER
FROM
LINEITEM
-- WHERE
-- L_SHIPDATE <= DATE '1998-12-01' - INTERVAL '93 DAY'
GROUP BY
L_RETURNFLAG,
L_LINESTATUS
43 changes: 43 additions & 0 deletions sql_dfphysicalplan/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use datafusion::execution::context::SessionContext;
use datafusion::prelude::*;
use datafusion_common::Result;
use std::io;

#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();

let tables = [
"customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier",
];
for table_name in tables {
ctx.register_csv(
table_name,
&format!("data/{}.csv", table_name),
CsvReadOptions::new().delimiter(b'|'),
)
.await?;
}

let stdin = io::read_to_string(io::stdin())?;

let df = ctx
.sql(&stdin)
.await
.unwrap_or_else(|err| panic!("{:#?}", err));

let logical_plan = df.clone().into_unoptimized_plan();
println!("Logical Plan:\n{:#?}", logical_plan);

// let physical_plan = df.clone().create_physical_plan().await?;
// println!("Physical Plan:\n{:#?}", physical_plan);

let results: Vec<_> = df.collect().await?;

// format the results
let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?.to_string();

println!("{}", pretty_results);

Ok(())
}

0 comments on commit 7ed7c71

Please sign in to comment.