Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/document_analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Dec 8, 2023
2 parents 8ff8300 + 34b0445 commit 65bc287
Show file tree
Hide file tree
Showing 172 changed files with 6,970 additions and 3,186 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Audit licenses
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
github.event_name == 'pull_request_target' &&
(github.event.action == 'opened' ||
github.event.action == 'synchronize')
uses: actions/labeler@v4.3.0
uses: actions/labeler@v5.0.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/workflows/dev_pr/labeler.yml
Expand Down
34 changes: 18 additions & 16 deletions .github/workflows/dev_pr/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,37 @@
# under the License.

development-process:
- dev/**.*
- .github/**.*
- ci/**.*
- .asf.yaml
- changed-files:
- any-glob-to-any-file: ['dev/**.*', '.github/**.*', 'ci/**.*', '.asf.yaml']

documentation:
- docs/**.*
- README.md
- ./**/README.md
- DEVELOPERS.md
- datafusion/docs/**.*
- changed-files:
- any-glob-to-any-file: ['docs/**.*', 'README.md', './**/README.md', 'DEVELOPERS.md', 'datafusion/docs/**.*']

sql:
- datafusion/sql/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/sql/**/*']

logical-expr:
- datafusion/expr/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/expr/**/*']

physical-expr:
- datafusion/physical-expr/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/physical-expr/**/*']

optimizer:
- datafusion/optimizer/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/optimizer/**/*']

core:
- datafusion/core/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/core/**/*']

substrait:
- datafusion/substrait/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/substrait/**/*']

sqllogictest:
- datafusion/sqllogictest/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/sqllogictest/**/*']
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
path: asf-site

- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install PyArrow
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ parquet = { version = "49.0.0", default-features = false, features = ["arrow", "
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
sqlparser = { version = "0.39.0", features = ["visitor"] }
sqlparser = { version = "0.40.0", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
chrono = { version = "0.4.31", default-features = false }
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ in-memory format. [Python Bindings](https://github.com/apache/arrow-datafusion-p
Here are links to some important information

- [Project Site](https://arrow.apache.org/datafusion)
- [Installation](https://arrow.apache.org/datafusion/user-guide/cli.html#installation)
- [Rust Getting Started](https://arrow.apache.org/datafusion/user-guide/example-usage.html)
- [Rust DataFrame API](https://arrow.apache.org/datafusion/user-guide/dataframe.html)
- [Rust API docs](https://docs.rs/datafusion/latest/datafusion)
Expand Down
39 changes: 34 additions & 5 deletions benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def compare(
noise_threshold: float,
) -> None:
baseline = BenchmarkRun.load_from_file(baseline_path)

comparison = BenchmarkRun.load_from_file(comparison_path)

console = Console()
Expand All @@ -124,27 +123,57 @@ def compare(
table.add_column(comparison_header, justify="right", style="dim")
table.add_column("Change", justify="right", style="dim")

faster_count = 0
slower_count = 0
no_change_count = 0
total_baseline_time = 0
total_comparison_time = 0

for baseline_result, comparison_result in zip(baseline.queries, comparison.queries):
assert baseline_result.query == comparison_result.query

total_baseline_time += baseline_result.execution_time
total_comparison_time += comparison_result.execution_time

change = comparison_result.execution_time / baseline_result.execution_time

if (1.0 - noise_threshold) <= change <= (1.0 + noise_threshold):
change = "no change"
change_text = "no change"
no_change_count += 1
elif change < 1.0:
change = f"+{(1 / change):.2f}x faster"
change_text = f"+{(1 / change):.2f}x faster"
faster_count += 1
else:
change = f"{change:.2f}x slower"
change_text = f"{change:.2f}x slower"
slower_count += 1

table.add_row(
f"Q{baseline_result.query}",
f"{baseline_result.execution_time:.2f}ms",
f"{comparison_result.execution_time:.2f}ms",
change,
change_text,
)

console.print(table)

# Calculate averages
avg_baseline_time = total_baseline_time / len(baseline.queries)
avg_comparison_time = total_comparison_time / len(comparison.queries)

# Summary table
summary_table = Table(show_header=True, header_style="bold magenta")
summary_table.add_column("Benchmark Summary", justify="left", style="dim")
summary_table.add_column("", justify="right", style="dim")

summary_table.add_row(f"Total Time ({baseline_header})", f"{total_baseline_time:.2f}ms")
summary_table.add_row(f"Total Time ({comparison_header})", f"{total_comparison_time:.2f}ms")
summary_table.add_row(f"Average Time ({baseline_header})", f"{avg_baseline_time:.2f}ms")
summary_table.add_row(f"Average Time ({comparison_header})", f"{avg_comparison_time:.2f}ms")
summary_table.add_row("Queries Faster", str(faster_count))
summary_table.add_row("Queries Slower", str(slower_count))
summary_table.add_row("Queries with No Change", str(no_change_count))

console.print(summary_table)

def main() -> None:
parser = ArgumentParser()
Expand Down
Loading

0 comments on commit 65bc287

Please sign in to comment.