Skip to content

Support vectorized append and compare for multi group by #34532

Support vectorized append and compare for multi group by

Support vectorized append and compare for multi group by #34532

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Rust
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
on:
push:
paths-ignore:
- "docs/**"
- "**.md"
- ".github/ISSUE_TEMPLATE/**"
- ".github/pull_request_template.md"
pull_request:
paths-ignore:
- "docs/**"
- "**.md"
- ".github/ISSUE_TEMPLATE/**"
- ".github/pull_request_template.md"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
jobs:
# Check crate compiles
linux-build-lib:
name: cargo check
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Cache Cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
./target/
./datafusion-cli/target/
key: cargo-cache-${{ hashFiles('**/Cargo.toml', '**/Cargo.lock') }}
- name: Check datafusion without default features
# Some of the test binaries require the parquet feature still
#run: cargo check --all-targets --no-default-features -p datafusion
run: cargo check --no-default-features -p datafusion
- name: Check datafusion-common without default features
run: cargo check --all-targets --no-default-features -p datafusion-common
- name: Check datafusion-functions
run: cargo check --all-targets --no-default-features -p datafusion-functions
- name: Check workspace in debug mode
run: cargo check --all-targets --workspace
- name: Check workspace with avro,json features
run: cargo check --workspace --benches --features avro,json
- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
# and check in the updated Cargo.lock file.
cargo check --manifest-path datafusion-cli/Cargo.toml --locked
# Ensure that the datafusion crate can be built with only a subset of the function
# packages enabled.
- name: Check datafusion (nested_expressions)
run: cargo check --no-default-features --features=nested_expressions -p datafusion
- name: Check datafusion (crypto)
run: cargo check --no-default-features --features=crypto_expressions -p datafusion
- name: Check datafusion (datetime_expressions)
run: cargo check --no-default-features --features=datetime_expressions -p datafusion
- name: Check datafusion (encoding_expressions)
run: cargo check --no-default-features --features=encoding_expressions -p datafusion
- name: Check datafusion (math_expressions)
run: cargo check --no-default-features --features=math_expressions -p datafusion
- name: Check datafusion (regex_expressions)
run: cargo check --no-default-features --features=regex_expressions -p datafusion
- name: Check datafusion (string_expressions)
run: cargo check --no-default-features --features=string_expressions -p datafusion
# Ensure that the datafusion-functions crate can be built with only a subset of the function
# packages enabled.
- name: Check datafusion-functions (crypto)
run: cargo check --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions
- name: Check datafusion-functions (datetime_expressions)
run: cargo check --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions
- name: Check datafusion-functions (encoding_expressions)
run: cargo check --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions
- name: Check datafusion-functions (math_expressions)
run: cargo check --all-targets --no-default-features --features=math_expressions -p datafusion-functions
- name: Check datafusion-functions (regex_expressions)
run: cargo check --all-targets --no-default-features --features=regex_expressions -p datafusion-functions
- name: Check datafusion-functions (string_expressions)
run: cargo check --all-targets --no-default-features --features=string_expressions -p datafusion-functions
# Run tests
linux-test:
name: cargo test (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests (excluding doctests)
run: cargo test --lib --tests --bins --features avro,json,backtrace
- name: Verify Working Directory Clean
run: git diff --exit-code
linux-test-datafusion-cli:
name: cargo test datafusion-cli (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests (excluding doctests)
run: |
cd datafusion-cli
cargo test --lib --tests --bins --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code
linux-test-example:
name: cargo examples (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run examples
run: |
# test datafusion-sql examples
cargo run --example sql
# test datafusion-examples
ci/scripts/rust_example.sh
- name: Verify Working Directory Clean
run: git diff --exit-code
# Run `cargo test doc` (test documentation examples)
linux-test-doc:
name: cargo test doc (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run doctests
run: |
cargo test --doc --features avro,json
cd datafusion-cli
cargo test --doc --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code
# Run `cargo doc` to ensure the rustdoc is clean
linux-rustdoc:
name: cargo doc
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run cargo doc
run: ci/scripts/rust_docs.sh
linux-wasm-pack:
name: build with wasm-pack
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build with wasm-pack
working-directory: ./datafusion/wasmtest
run: wasm-pack build --dev
# verify that the benchmark queries return the correct results
verify-benchmark-results:
name: verify benchmark results (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Generate benchmark data and expected query results
run: |
mkdir -p datafusion/sqllogictest/test_files/tpch/data
git clone https://github.com/databricks/tpch-dbgen.git
cd tpch-dbgen
make
./dbgen -f -s 0.1
mv *.tbl ../datafusion/sqllogictest/test_files/tpch/data
- name: Verify that benchmark queries return expected results
run: |
export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data`
# use release build for plan verificaton because debug build causes stack overflow
cargo test plan_q --package datafusion-benchmarks --profile release-nonlto --features=ci -- --test-threads=1
INCLUDE_TPCH=true cargo test --test sqllogictests
- name: Verify Working Directory Clean
run: git diff --exit-code
sqllogictest-postgres:
name: "Run sqllogictest with Postgres runner"
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: db_test
POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=C --lc-ctype=C
ports:
- 5432/tcp
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup toolchain
run: |
rustup toolchain install stable
rustup default stable
- name: Run sqllogictest
run: PG_COMPAT=true PG_URI="postgresql://postgres:postgres@localhost:$POSTGRES_PORT/db_test" cargo test --features=postgres --test sqllogictests
env:
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
windows:
name: cargo test (win64)
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-windows-builder
- name: Run tests (excluding doctests)
shell: bash
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --lib --tests --bins --features avro,json,backtrace
cd datafusion-cli
cargo test --lib --tests --bins --all-features
macos:
name: cargo test (macos)
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-macos-builder
- name: Run tests (excluding doctests)
shell: bash
run: |
cargo test --lib --tests --bins --features avro,json,backtrace
cd datafusion-cli
cargo test --lib --tests --bins --all-features
macos-aarch64:
name: cargo test (macos-aarch64)
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-macos-aarch64-builder
- name: Run tests (excluding doctests)
shell: bash
run: |
cargo test --lib --tests --bins --features avro,json,backtrace
cd datafusion-cli
cargo test --lib --tests --bins --all-features
test-datafusion-pyarrow:
name: cargo test pyarrow (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-20.04
container:
image: amd64/rust:bullseye # Workaround https://github.com/actions/setup-python/issues/721
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install PyArrow
run: |
echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
python -m pip install pyarrow
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run datafusion-common tests
run: cargo test -p datafusion-common --features=pyarrow
vendor:
name: Verify Vendored Code
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Run gen
run: ./regen.sh
working-directory: ./datafusion/proto
- name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results)
run: git diff --exit-code
check-fmt:
name: Check cargo fmt
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run
run: |
echo '' > datafusion/proto/src/generated/datafusion.rs
ci/scripts/rust_fmt.sh
# Coverage job disabled due to
# https://github.com/apache/datafusion/issues/3678
# coverage:
# name: coverage
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: true
# - name: Install protobuf compiler
# shell: bash
# run: |
# mkdir -p $HOME/d/protoc
# cd $HOME/d/protoc
# export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
# curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
# unzip $PROTO_ZIP
# export PATH=$PATH:$HOME/d/protoc/bin
# protoc --version
# - name: Setup Rust toolchain
# run: |
# rustup toolchain install stable
# rustup default stable
# rustup component add rustfmt clippy
# - name: Cache Cargo
# uses: actions/cache@v4
# with:
# path: /home/runner/.cargo
# # this key is not equal because the user is different than on a container (runner vs github)
# key: cargo-coverage-cache3-
# - name: Run coverage
# run: |
# export PATH=$PATH:$HOME/d/protoc/bin
# rustup toolchain install stable
# rustup default stable
# cargo install --version 0.20.1 cargo-tarpaulin
# cargo tarpaulin --all --out Xml
# - name: Report coverage
# continue-on-error: true
# run: bash <(curl -s https://codecov.io/bash)
clippy:
name: clippy
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install Clippy
run: rustup component add clippy
- name: Run clippy
run: ci/scripts/rust_clippy.sh
# Check answers are correct when hash values collide
hash-collisions:
name: cargo test hash collisions (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests
run: |
cd datafusion
cargo test --lib --tests --features=force_hash_collisions,avro
cargo-toml-formatting-checks:
name: check Cargo.toml formatting
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install taplo
run: cargo +stable install taplo-cli --version ^0.9 --locked
# if you encounter an error, try running 'taplo format' to fix the formatting automatically.
- name: Check Cargo.toml formatting
run: taplo format --check
config-docs-check:
name: check configs.md and ***_functions.md is up-to-date
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- uses: actions/setup-node@v4
with:
node-version: "20"
- name: Check if configs.md has been modified
run: |
# If you encounter an error, run './dev/update_config_docs.sh' and commit
./dev/update_config_docs.sh
git diff --exit-code
- name: Check if any of the ***_functions.md has been modified
run: |
# If you encounter an error, run './dev/update_function_docs.sh' and commit
./dev/update_function_docs.sh
git diff --exit-code
# Verify MSRV for the crates which are directly used by other projects:
# - datafusion
# - datafusion-substrait
# - datafusion-proto
# - datafusion-cli
msrv:
name: Verify MSRV (Min Supported Rust Version)
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Install cargo-msrv
run: cargo install cargo-msrv
- name: Check datafusion
working-directory: datafusion/core
run: |
# If you encounter an error with any of the commands below it means
# your code or some crate in the dependency tree has a higher MSRV
# (Min Supported Rust Version) than the one specified in the
# `rust-version` key of `Cargo.toml`.
#
# To reproduce:
# 1. Install the version of Rust that is failing. Example:
# rustup install 1.79.0
# 2. Run the command that failed with that version. Example:
# cargo +1.79.0 check -p datafusion
#
# To resolve, either:
# 1. Change your code to use older Rust features,
# 2. Revert dependency update
# 3. Update the MSRV version in `Cargo.toml`
#
# Please see the DataFusion Rust Version Compatibility Policy before
# updating Cargo.toml. You may have to update the code instead.
# https://github.com/apache/datafusion/blob/main/README.md#rust-version-compatibility-policy
cargo msrv --output-format json --log-target stdout verify
- name: Check datafusion-substrait
working-directory: datafusion/substrait
run: cargo msrv --output-format json --log-target stdout verify
- name: Check datafusion-proto
working-directory: datafusion/proto
run: cargo msrv --output-format json --log-target stdout verify
- name: Check datafusion-cli
working-directory: datafusion-cli
run: cargo msrv --output-format json --log-target stdout verify