Skip to content

Commit

Permalink
Try running a basic comparison benchmark between base and pr commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Mar 5, 2024
1 parent ac27428 commit a43dea4
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 3 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Run and Cache Benchmarks

on:
pull_request:
types: [labeled, opened, reopened, synchronize]

jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Checkout PR changes
uses: actions/checkout@v4

- name: Setup data and generate unique result names
shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"'
run: |
cd benchmarks
mkdir data
# Setup the TPC-H data set with a scale factor of 10
./bench.sh data tpch
# Generate a unique-ish identifier for the results using
# branch name and commit sha
short_ref=$(echo "${{ github.head_ref }}" | cut -c1-20)
short_sha=$(echo "${{ github.sha }}" | cut -c1-7)
echo "HEAD_REF_SHA=$short_ref-$short_sha" >> "$GITHUB_ENV"
short_sha=$(echo "${{ github.event.pull_request.base.sha }}" | cut -c1-7)
echo "BASE_REF_SHA=${{ github.base_ref }}-$short_sha" >> "$GITHUB_ENV"
- name: Benchmark PR changes
run: |
cd benchmarks
RESULTS_NAME=${{ env.HEAD_REF_SHA }} ./bench.sh run tpch
- name: Checkout base commit
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.sha }}
clean: false

- name: Benchmark baseline and compare
run: |
cd benchmarks
RESULTS_NAME=${{ env.BASE_REF_SHA }} ./bench.sh run tpch
pip3 install rich
./bench.sh compare ${{ env.BASE_REF_SHA }} ${{ env.HEAD_REF_SHA }}
8 changes: 5 additions & 3 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ clickbench_extended: ClickBench "inspired" queries against a single parquet (
DATA_DIR directory to store datasets
CARGO_COMMAND command that runs the benchmark binary
DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)
RESULTS_NAME folder where the benchmark files are stored
"
exit 1
}
Expand Down Expand Up @@ -166,18 +167,19 @@ main() {
esac
;;
run)
# Parse positional paraleters
# Parse positional parameters
BENCHMARK=${ARG2:-"${BENCHMARK}"}
BRANCH_NAME=$(cd ${DATAFUSION_DIR} && git rev-parse --abbrev-ref HEAD)
BRANCH_NAME=${BRANCH_NAME//\//_} # mind blowing syntax to replace / with _
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$BRANCH_NAME"}
RESULTS_NAME=${RESULTS_NAME:-"${BRANCH_NAME}"}
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$RESULTS_NAME"}

echo "***************************"
echo "DataFusion Benchmark Script"
echo "COMMAND: ${COMMAND}"
echo "BENCHMARK: ${BENCHMARK}"
echo "DATAFUSION_DIR: ${DATAFUSION_DIR}"
echo "BRACH_NAME: ${BRANCH_NAME}"
echo "BRANCH_NAME: ${BRANCH_NAME}"
echo "DATA_DIR: ${DATA_DIR}"
echo "RESULTS_DIR: ${RESULTS_DIR}"
echo "CARGO_COMMAND: ${CARGO_COMMAND}"
Expand Down
3 changes: 3 additions & 0 deletions datafusion/physical-plan/src/sorts/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,9 @@ impl ExecutionPlan for SortExec {
let batch = batch?;
sorter.insert_batch(batch).await?;
}
// Test whether benchmarks catch this
// TODO: remove before merge!
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
sorter.sort()
})
.try_flatten(),
Expand Down

0 comments on commit a43dea4

Please sign in to comment.