Skip to content

Commit

Permalink
Merge pull request #251 from pmcgleenon/datafusion-43
Browse files Browse the repository at this point in the history
updated for datafusion release 43.0.0
  • Loading branch information
rschu1ze authored Nov 15, 2024
2 parents e452fdf + 5abcf2a commit be71464
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 95 deletions.
2 changes: 1 addition & 1 deletion datafusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The benchmark should be completed in under an hour. On-demand pricing is $0.6 pe
1. `cd ClickBench/datafusion`
1. `vi benchmark.sh` and modify following line to target Datafusion version
```
git checkout 40.0.0
git checkout 43.0.0
```
1. `bash benchmark.sh`
Expand Down
2 changes: 1 addition & 1 deletion datafusion/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sudo yum install gcc -y
# Install DataFusion main branch
git clone https://github.com/apache/arrow-datafusion.git
cd arrow-datafusion/datafusion-cli
git checkout 40.0.0
git checkout 43.0.0
CARGO_PROFILE_RELEASE_LTO=true RUSTFLAGS="-C codegen-units=1" cargo build --release
export PATH="`pwd`/target/release:$PATH"
cd ../..
Expand Down
3 changes: 2 additions & 1 deletion datafusion/create_partitioned.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
CREATE EXTERNAL TABLE hits
STORED AS PARQUET
LOCATION 'partitioned';
LOCATION 'partitioned'
OPTIONS ('binary_as_string' 'true');
3 changes: 2 additions & 1 deletion datafusion/create_single.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
CREATE EXTERNAL TABLE hits
STORED AS PARQUET
LOCATION 'hits.parquet';
LOCATION 'hits.parquet'
OPTIONS ('binary_as_string' 'true');
90 changes: 45 additions & 45 deletions datafusion/results/partitioned.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
{
"system": "DataFusion (Parquet, partitioned)",
"date": "2024-07-27",
"date": "2024-11-15",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "v40.0.0 (4cae813)",
"comment": "v43.0.0 (88f58bf)",

"tags": ["Rust", "column-oriented", "embedded", "stateless"],

"load_time": 0,
"data_size": 14779976446,

"result": [
[0.043, 0.018, 0.016],
[0.087, 0.031, 0.028],
[0.173, 0.072, 0.073],
[0.356, 0.075, 0.081],
[1.201, 0.784, 0.796],
[0.960, 0.831, 0.837],
[0.057, 0.026, 0.026],
[0.062, 0.029, 0.031],
[1.408, 1.314, 1.315],
[1.302, 1.025, 1.038],
[0.483, 0.280, 0.269],
[0.705, 0.306, 0.296],
[1.137, 0.931, 0.939],
[3.183, 2.245, 2.252],
[1.499, 1.415, 1.429],
[1.011, 0.901, 0.897],
[3.230, 2.670, 2.655],
[3.136, 2.560, 2.539],
[6.849, 5.608, 5.827],
[0.299, 0.075, 0.068],
[10.086, 1.544, 1.617],
[11.238, 1.821, 1.835],
[21.957, 4.104, 4.132],
[55.510, 10.615, 10.548],
[2.678, 0.503, 0.500],
[0.765, 0.412, 0.413],
[2.649, 0.574, 0.559],
[9.652, 2.177, 2.203],
[8.528, 5.051, 5.019],
[0.499, 0.421, 0.439],
[2.389, 1.018, 1.028],
[6.060, 1.520, 1.513],
[8.820, 8.081, 7.826],
[10.604, 4.851, 5.088],
[10.567, 4.971, 4.880],
[1.737, 1.659, 1.649],
[0.363, 0.247, 0.231],
[0.156, 0.093, 0.092],
[0.198, 0.125, 0.124],
[0.902, 0.701, 0.683],
[0.144, 0.042, 0.041],
[0.130, 0.037, 0.040],
[0.131, 0.055, 0.050]
[0.051, 0.019, 0.019],
[0.091, 0.035, 0.035],
[0.189, 0.085, 0.088],
[0.383, 0.081, 0.077],
[1.071, 0.811, 0.803],
[0.944, 0.801, 0.805],
[0.078, 0.030, 0.030],
[0.103, 0.037, 0.037],
[1.313, 1.205, 1.201],
[1.357, 1.034, 1.025],
[0.511, 0.255, 0.253],
[0.634, 0.295, 0.301],
[1.016, 0.856, 0.879],
[2.615, 1.421, 1.374],
[1.131, 0.931, 0.918],
[1.051, 0.952, 0.958],
[2.672, 2.031, 2.066],
[2.592, 1.879, 1.887],
[5.549, 4.226, 4.335],
[0.254, 0.078, 0.075],
[9.967, 1.098, 1.092],
[11.248, 1.329, 1.327],
[21.868, 2.820, 2.818],
[55.458, 10.286, 10.609],
[2.678, 0.488, 0.486],
[0.802, 0.352, 0.354],
[2.672, 0.507, 0.498],
[9.614, 1.513, 1.507],
[8.368, 3.394, 3.521],
[0.535, 0.418, 0.439],
[2.362, 0.854, 0.861],
[5.957, 0.910, 0.914],
[4.780, 3.806, 3.871],
[10.168, 3.654, 3.586],
[10.090, 3.645, 3.546],
[1.775, 1.644, 1.660],
[0.364, 0.199, 0.183],
[0.183, 0.078, 0.075],
[0.290, 0.128, 0.123],
[0.619, 0.376, 0.376],
[0.148, 0.053, 0.044],
[0.142, 0.042, 0.042],
[0.155, 0.065, 0.053]
]
}
90 changes: 45 additions & 45 deletions datafusion/results/single.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
{
"system": "DataFusion (Parquet, single)",
"date": "2024-07-27",
"date": "2024-11-15",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "v40.0.0 (4cae813)",
"comment": "v43.0.0 (88f58bf)",

"tags": ["Rust", "column-oriented", "embedded", "stateless"],

"load_time": 0,
"data_size": 14779976446,

"result": [
[0.076, 0.051, 0.055],
[0.113, 0.066, 0.066],
[0.196, 0.115, 0.105],
[0.340, 0.114, 0.115],
[1.074, 0.862, 0.858],
[0.995, 0.874, 0.909],
[0.088, 0.076, 0.065],
[0.102, 0.078, 0.068],
[1.442, 1.349, 1.368],
[1.260, 1.083, 1.064],
[0.451, 0.306, 0.304],
[0.597, 0.337, 0.335],
[1.088, 0.986, 0.974],
[3.085, 2.261, 2.268],
[1.522, 1.428, 1.429],
[1.068, 0.957, 0.960],
[3.217, 2.702, 2.754],
[3.149, 2.621, 2.564],
[6.978, 5.679, 5.865],
[0.338, 0.107, 0.113],
[9.885, 1.466, 1.474],
[11.225, 1.794, 1.791],
[22.035, 3.906, 3.912],
[55.923, 10.899, 10.975],
[2.560, 0.579, 0.575],
[0.754, 0.509, 0.506],
[2.517, 0.674, 0.651],
[9.574, 2.220, 2.216],
[9.070, 4.926, 4.940],
[0.536, 0.473, 0.481],
[2.288, 1.090, 1.101],
[5.823, 1.543, 1.528],
[8.637, 8.328, 7.848],
[10.477, 4.972, 5.022],
[10.435, 4.910, 5.020],
[1.827, 1.685, 1.724],
[0.389, 0.275, 0.270],
[0.201, 0.175, 0.160],
[0.230, 0.173, 0.172],
[0.887, 0.749, 0.755],
[0.172, 0.085, 0.076],
[0.165, 0.075, 0.073],
[0.160, 0.090, 0.100]
[0.093, 0.055, 0.056],
[0.138, 0.070, 0.070],
[0.206, 0.120, 0.117],
[0.346, 0.118, 0.114],
[0.979, 0.867, 0.871],
[1.030, 0.902, 0.904],
[0.125, 0.064, 0.077],
[0.143, 0.083, 0.078],
[1.304, 1.169, 1.240],
[1.533, 1.104, 1.100],
[0.475, 0.272, 0.278],
[0.562, 0.309, 0.315],
[1.165, 0.931, 0.965],
[2.643, 1.402, 1.490],
[1.143, 0.997, 0.983],
[1.106, 0.991, 0.993],
[2.727, 2.161, 2.098],
[2.578, 1.954, 1.947],
[5.530, 4.311, 4.253],
[0.319, 0.105, 0.107],
[9.732, 1.155, 1.149],
[11.337, 1.468, 1.407],
[22.055, 3.678, 3.663],
[55.942, 10.017, 10.014],
[2.561, 0.557, 0.577],
[0.809, 0.510, 0.519],
[2.579, 0.634, 0.620],
[9.630, 1.618, 1.655],
[8.645, 3.565, 3.699],
[0.584, 0.493, 0.485],
[2.285, 0.978, 0.991],
[5.690, 1.046, 1.006],
[4.468, 3.833, 3.885],
[10.123, 3.663, 3.654],
[10.114, 3.672, 3.685],
[1.743, 1.597, 1.659],
[0.389, 0.242, 0.230],
[0.266, 0.155, 0.170],
[0.369, 0.161, 0.180],
[0.659, 0.446, 0.416],
[0.190, 0.084, 0.085],
[0.177, 0.078, 0.079],
[0.164, 0.103, 0.088]
]
}
2 changes: 1 addition & 1 deletion datafusion/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cat queries.sql | while read query; do
# 2. each query contains a "Query took xxx seconds", we just grep these 2 lines
# 3. use sed to take the second line
# 4. use awk to take the number we want
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Elapsed" |sed -n 2p | awk '{ print $2 }'
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Elapsed" |sed -n 2p | awk '{ print $2 }'`
[[ $RES != "" ]] && \
echo -n "$RES" || \
echo -n "null"
Expand Down

0 comments on commit be71464

Please sign in to comment.