Skip to content

[feat] Support cache ListFiles result cache in session level #19007

[feat] Support cache ListFiles result cache in session level

[feat] Support cache ListFiles result cache in session level #19007

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Rust
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
on:
push:
paths-ignore:
- "docs/**"
- "**.md"
- ".github/ISSUE_TEMPLATE/**"
- ".github/pull_request_template.md"
pull_request:
paths-ignore:
- "docs/**"
- "**.md"
- ".github/ISSUE_TEMPLATE/**"
- ".github/pull_request_template.md"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
jobs:
# Check crate compiles
linux-build-lib:
name: cargo check
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Cache Cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
./target/
./datafusion-cli/target/
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-benchmark-${{ hashFiles('datafusion/**/Cargo.toml', 'benchmarks/Cargo.toml', 'datafusion-cli/Cargo.toml') }}
- name: Check workspace without default features
run: cargo check --no-default-features -p datafusion
- name: Check workspace in debug mode
run: cargo check
- name: Check workspace with all features
run: cargo check --workspace --benches --features avro,json
- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
# and check in the updated Cargo.lock file.
cargo check --manifest-path datafusion-cli/Cargo.toml --locked
# test the crate
linux-test:
name: cargo test (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests (excluding doctests)
run: cargo test --lib --tests --bins --features avro,json,backtrace
- name: Verify Working Directory Clean
run: git diff --exit-code
linux-test-datafusion-cli:
name: cargo test datafusion-cli (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests (excluding doctests)
run: |
cd datafusion-cli
cargo test --lib --tests --bins --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code
linux-test-example:
name: cargo examples (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run examples
run: |
# test datafusion-sql examples
cargo run --example sql
# test datafusion-examples
cargo run --example avro_sql --features=datafusion/avro
cargo run --example csv_sql
cargo run --example custom_datasource
cargo run --example dataframe
cargo run --example dataframe_in_memory
cargo run --example deserialize_to_struct
cargo run --example expr_api
cargo run --example parquet_sql
cargo run --example parquet_sql_multiple_files
cargo run --example memtable
cargo run --example rewrite_expr
cargo run --example simple_udf
cargo run --example simple_udaf
- name: Verify Working Directory Clean
run: git diff --exit-code
# Run `cargo test doc` (test documentation examples)
linux-test-doc:
name: cargo test doc (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run doctests
run: |
cargo test --doc --features avro,json
cd datafusion-cli
cargo test --doc --all-features
- name: Verify Working Directory Clean
run: git diff --exit-code
# Run `cargo doc` to ensure the rustdoc is clean
linux-rustdoc:
name: cargo doc
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run cargo doc
run: |
export RUSTDOCFLAGS="-D warnings -A rustdoc::private-intra-doc-links"
cargo doc --document-private-items --no-deps --workspace
cd datafusion-cli
cargo doc --document-private-items --no-deps
linux-wasm-pack:
name: build with wasm-pack
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build with wasm-pack
working-directory: ./datafusion/wasmtest
run: wasm-pack build --dev
# verify that the benchmark queries return the correct results
verify-benchmark-results:
name: verify benchmark results (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Generate benchmark data and expected query results
run: |
mkdir -p datafusion/sqllogictest/test_files/tpch/data
git clone https://github.com/databricks/tpch-dbgen.git
cd tpch-dbgen
make
./dbgen -f -s 0.1
mv *.tbl ../datafusion/sqllogictest/test_files/tpch/data
- name: Verify that benchmark queries return expected results
run: |
export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data`
# use release build for plan verificaton because debug build causes stack overflow
cargo test plan_q --package datafusion-benchmarks --profile release-nonlto --features=ci -- --test-threads=1
INCLUDE_TPCH=true cargo test --test sqllogictests
- name: Verify Working Directory Clean
run: git diff --exit-code
sqllogictest-postgres:
name: "Run sqllogictest with Postgres runner"
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: db_test
POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=C --lc-ctype=C
ports:
- 5432/tcp
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup toolchain
run: |
rustup toolchain install stable
rustup default stable
- name: Run sqllogictest
run: PG_COMPAT=true PG_URI="postgresql://postgres:postgres@localhost:$POSTGRES_PORT/db_test" cargo test --features=postgres --test sqllogictests
env:
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
windows:
name: cargo test (win64)
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-win64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc.exe --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
- name: Run tests (excluding doctests)
shell: bash
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --lib --tests --bins --features avro,json,backtrace
cd datafusion-cli
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
RUST_BACKTRACE: "1"
macos:
name: cargo test (mac)
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-osx-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
echo "$HOME/d/protoc/bin" >> $GITHUB_PATH
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
- name: Run tests (excluding doctests)
shell: bash
run: |
cargo test --lib --tests --bins --features avro,json,backtrace
cd datafusion-cli
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
RUST_BACKTRACE: "1"
test-datafusion-pyarrow:
name: cargo test pyarrow (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-20.04
container:
image: amd64/rust:bullseye # Workaround https://github.com/actions/setup-python/issues/721
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install PyArrow
run: |
echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
python -m pip install pyarrow
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run datafusion-common tests
run: cargo test -p datafusion-common --features=pyarrow
vendor:
name: Verify Vendored Code
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Run gen
run: ./regen.sh
working-directory: ./datafusion/proto
- name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results)
run: git diff --exit-code
check-fmt:
name: Check cargo fmt
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run
run: |
echo '' > datafusion/proto/src/generated/datafusion.rs
ci/scripts/rust_fmt.sh
# Coverage job disabled due to
# https://github.com/apache/arrow-datafusion/issues/3678
# coverage:
# name: coverage
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: true
# - name: Install protobuf compiler
# shell: bash
# run: |
# mkdir -p $HOME/d/protoc
# cd $HOME/d/protoc
# export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
# curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
# unzip $PROTO_ZIP
# export PATH=$PATH:$HOME/d/protoc/bin
# protoc --version
# - name: Setup Rust toolchain
# run: |
# rustup toolchain install stable
# rustup default stable
# rustup component add rustfmt clippy
# - name: Cache Cargo
# uses: actions/cache@v3
# with:
# path: /home/runner/.cargo
# # this key is not equal because the user is different than on a container (runner vs github)
# key: cargo-coverage-cache3-
# - name: Run coverage
# run: |
# export PATH=$PATH:$HOME/d/protoc/bin
# rustup toolchain install stable
# rustup default stable
# cargo install --version 0.20.1 cargo-tarpaulin
# cargo tarpaulin --all --out Xml
# - name: Report coverage
# continue-on-error: true
# run: bash <(curl -s https://codecov.io/bash)
clippy:
name: clippy
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install Clippy
run: rustup component add clippy
- name: Run clippy
run: ci/scripts/rust_clippy.sh
# Check answers are correct when hash values collide
hash-collisions:
name: cargo test hash collisions (amd64)
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests
run: |
cd datafusion
cargo test --lib --tests --features=force_hash_collisions,avro
cargo-toml-formatting-checks:
name: check Cargo.toml formatting
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Install cargo-tomlfmt
run: which cargo-tomlfmt || cargo install cargo-tomlfmt
- name: Check Cargo.toml formatting
run: |
# if you encounter error, try rerun the command below, finally run 'git diff' to
# check which Cargo.toml introduces formatting violation
#
# ignore ./Cargo.toml because putting workspaces in multi-line lists make it easy to read
ci/scripts/rust_toml_fmt.sh
git diff --exit-code
config-docs-check:
name: check configs.md is up-to-date
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- uses: actions/setup-node@v3
with:
node-version: "14"
- name: Check if configs.md has been modified
run: |
# If you encounter an error, run './dev/update_config_docs.sh' and commit
./dev/update_config_docs.sh
git diff --exit-code
# Verify MSRV for the crates which are directly used by other projects.
msrv:
name: Verify MSRV
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Install cargo-msrv
run: cargo install cargo-msrv
- name: Check datafusion
working-directory: datafusion/core
run: cargo msrv verify
- name: Check datafusion-substrait
working-directory: datafusion/substrait
run: cargo msrv verify
- name: Check datafusion-proto
working-directory: datafusion/proto
run: cargo msrv verify
- name: Check datafusion-cli
working-directory: datafusion-cli
run: cargo msrv verify