Skip to content

ci: use platform-agnostic way to delete first line #1

ci: use platform-agnostic way to delete first line

ci: use platform-agnostic way to delete first line #1

Workflow file for this run

name: Backends
on:
push:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"
pull_request:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"
merge_group:
permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
env:
FORCE_COLOR: "1"
jobs:
gen_lockfile_backends:
name: Generate Poetry Lockfile for non-Snowflake Backends
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.10" # For PyFlink, which does not support Python 3.11 yet
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
- name: install python
id: install_python
uses: actions/setup-python@v4
with:
python-version: "${{ matrix.python-version }}"
- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}
- name: remove snowflake deps that are not compatible with everything else
run: poetry remove snowflake-sqlalchemy snowflake-connector-python
- name: update deps originally constrained by snowflake
run: poetry update numpy pandas pyarrow datafusion
- name: upload deps file
uses: actions/upload-artifact@v3
with:
name: backend-deps-${{ matrix.python-version }}
path: |
pyproject.toml
poetry.lock
test_backends:
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs:
- gen_lockfile_backends
env:
SQLALCHEMY_WARN_20: "1"
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.9"
- "3.11"
backend:
- name: dask
title: Dask
extras:
- dask
- name: duckdb
title: DuckDB
extras:
- duckdb
- deltalake
additional_deps:
- torch
- name: pandas
title: Pandas
extras:
- pandas
- name: sqlite
title: SQLite
extras:
- sqlite
- name: datafusion
title: Datafusion
extras:
- datafusion
- name: polars
title: Polars
extras:
- polars
- deltalake
- name: mysql
title: MySQL
services:
- mysql
extras:
- mysql
- geospatial
sys-deps:
- libgeos-dev
- name: clickhouse
title: ClickHouse
services:
- clickhouse
extras:
- clickhouse
- name: postgres
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- name: postgres
title: PostgreSQL + Torch
extras:
- postgres
- geospatial
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
- name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- name: mssql
title: MS SQL Server
serial: true
extras:
- mssql
services:
- mssql
sys-deps:
- libkrb5-dev
- krb5-config
- freetds-dev
- name: trino
title: Trino
extras:
- trino
- postgres
services:
- trino
- name: druid
title: Druid
extras:
- druid
services:
- druid
- name: oracle
title: Oracle
serial: true
extras:
- oracle
services:
- oracle
- name: flink
title: Flink
serial: true
extras:
- flink
additional_deps:
- apache-flink
even_more_deps:
- pandas~=1.5
exclude:
- os: windows-latest
backend:
name: mysql
title: MySQL
extras:
- mysql
- geospatial
services:
- mysql
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: clickhouse
title: ClickHouse
extras:
- clickhouse
services:
- clickhouse
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: postgres
title: PostgreSQL + Torch
extras:
- postgres
- geospatial
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- os: windows-latest
backend:
name: mssql
title: MS SQL Server
serial: true
extras:
- mssql
services:
- mssql
sys-deps:
- libkrb5-dev
- krb5-config
- freetds-dev
- os: windows-latest
backend:
name: trino
title: Trino
services:
- trino
extras:
- trino
- postgres
- os: windows-latest
backend:
name: druid
title: Druid
extras:
- druid
services:
- druid
- os: windows-latest
backend:
name: oracle
title: Oracle
serial: true
extras:
- oracle
services:
- oracle
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
run: |
set -euo pipefail
sudo apt-get update -qq -y
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }}
- name: install sqlite
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite'
run: choco install sqlite
- name: checkout
uses: actions/checkout@v3
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}
- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: delete header rows on ubuntu
if: matrix.os == 'ubuntu-latest' && matrix.backend.name == 'flink'
run: |
python -c "exec(\"TEST_FILES = ['functional_alltypes', 'diamonds', 'batting', 'awards_players']\nfor file in TEST_FILES:\n with open(f'ci/ibis-testing-data/csv/{file}.csv', 'r+') as f:\n f.readline()\n data=f.read()\n f.seek(0)\n f.write(data)\n f.truncate()\n\")"
- name: delete header rows on windows
if: matrix.os == 'windows-latest' && matrix.backend.name == 'flink'
run: |
python -c @"
TEST_FILES = ["functional_alltypes", "diamonds", "batting", "awards_players"]

Check failure on line 350 in .github/workflows/ibis-backends.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/ibis-backends.yml

Invalid workflow file

You have an error in your yaml syntax on line 350
for file in TEST_FILES:
with open(f"ci/ibis-testing-data/csv/{file}.csv", "r+") as f:
f.readline()
data=f.read()
f.seek(0)
f.write(data)
f.truncate()
"@
- name: download poetry lockfile
uses: actions/download-artifact@v3
with:
name: backend-deps-${{ matrix.python-version }}
path: deps
- name: pull out lockfile
shell: bash
run: |
set -euo pipefail
mv -f deps/* .
rm -r deps
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }}
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
- name: install other deps
if: matrix.backend.additional_deps != null
run: poetry run pip install "${{ join(matrix.backend.additional_deps, ' ') }}"
# FIXME(deepyaman)
- name: install even more deps
if: matrix.backend.even_more_deps != null
run: poetry run pip install "${{ join(matrix.backend.even_more_deps, ' ') }}"
- name: show installed deps
run: poetry run pip list
- name: "run parallel tests: ${{ matrix.backend.name }}"
if: ${{ !matrix.backend.serial }}
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial && matrix.backend.name == 'impala'
run: just ci-check -m ${{ matrix.backend.name }} --randomly-dont-reorganize
env:
IBIS_TEST_NN_HOST: localhost
IBIS_TEST_IMPALA_HOST: localhost
IBIS_TEST_IMPALA_PORT: 21050
IBIS_TEST_WEBHDFS_PORT: 50070
IBIS_TEST_WEBHDFS_USER: hdfs
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial && matrix.backend.name != 'impala'
run: just ci-check -m ${{ matrix.backend.name }} -v
env:
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
- name: Show docker compose logs on fail
if: matrix.backend.services != null && failure()
run: docker compose logs
test_backends_min_version:
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
env:
SQLALCHEMY_WARN_20: "1"
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.9"
- "3.11"
backend:
- name: dask
title: Dask
deps:
- "dask[array,dataframe]@2022.9.1"
- "[email protected]"
extras:
- dask
- name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
exclude:
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
- python-version: "3.11"
backend:
name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
steps:
- name: checkout
uses: actions/checkout@v3
- name: install libgeos for shapely
if: matrix.backend.name == 'postgres'
run: sudo apt-get install -qq -y build-essential libgeos-dev
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}
- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
- name: install minimum versions
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
- name: checkout the lock file
run: git checkout poetry.lock
- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
- name: run tests
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
- name: Show docker compose logs on fail
if: matrix.backend.services != null && failure()
run: docker compose logs
test_pyspark:
name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} pandas-${{ matrix.pandas.version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.10"
pandas:
- version: "1.5.*"
- version: "2.*.*"
conflicts:
- snowflake-sqlalchemy
- snowflake-connector-python
include:
- os: ubuntu-latest
python-version: "3.9"
pandas:
version: "1.5.*"
- os: ubuntu-latest
python-version: "3.11"
pandas:
version: "2.*.*"
conflicts:
- snowflake-sqlalchemy
- snowflake-connector-python
steps:
- name: checkout
uses: actions/checkout@v3
- uses: actions/setup-java@v3
with:
distribution: microsoft
java-version: 17
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
- name: remove conflicting deps
if: matrix.pandas.conflicts != null
run: poetry remove ${{ join(matrix.pandas.conflicts, ' ') }}
- name: install minimum versions
run: poetry add --lock 'pandas@${{ matrix.pandas.version }}' '[email protected].*'
- name: checkout the lock file
run: git checkout poetry.lock
- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update
- name: install ibis
run: poetry install --without dev --without docs --extras pyspark
- name: run tests
run: just ci-check -m pyspark
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }},pandas-${{ matrix.pandas.version }}
gen_lockfile_sqlalchemy2:
name: Generate Poetry Lockfile for SQLAlchemy 2
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3
- name: install python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- run: python -m pip install --upgrade pip 'poetry<1.4'
- name: remove deps that are not compatible with sqlalchemy 2
run: poetry remove snowflake-sqlalchemy
- name: add sqlalchemy 2
run: poetry add --lock --optional 'sqlalchemy>=2,<3'
- name: checkout the lock file
run: git checkout poetry.lock
- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update
- name: check the sqlalchemy version
run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.'
- name: upload deps file
uses: actions/upload-artifact@v3
with:
name: deps
path: |
pyproject.toml
poetry.lock
test_backends_sqlalchemy2:
name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs: gen_lockfile_sqlalchemy2
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.11"
backend:
- name: mssql
title: MS SQL Server
services:
- mssql
extras:
- mssql
- name: mysql
title: MySQL
services:
- mysql
extras:
- geospatial
- mysql
- name: postgres
title: PostgreSQL
services:
- postgres
extras:
- geospatial
- postgres
- name: sqlite
title: SQLite
extras:
- sqlite
- name: trino
title: Trino
services:
- trino
extras:
- trino
- postgres
- name: duckdb
title: DuckDB
extras:
- duckdb
- name: oracle
title: Oracle
serial: true
extras:
- oracle
services:
- oracle
steps:
- name: checkout
uses: actions/checkout@v3
- name: install libgeos for shapely
if: ${{ matrix.backend.name == 'postgres' }}
run: sudo apt-get install -qq -y build-essential libgeos-dev
- name: install freetds-dev for mssql
if: ${{ matrix.backend.name == 'mssql' }}
run: sudo apt-get install -qq -y build-essential libkrb5-dev krb5-config freetds-dev
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}
- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: download poetry lockfile
uses: actions/download-artifact@v3
with:
name: deps
path: deps
- name: pull out lockfile
run: |
set -euo pipefail
mv -f deps/* .
rm -r deps
- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }}
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
- name: run tests
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
needs:
- test_backends_min_version
- test_backends
- test_backends_sqlalchemy2
- test_pyspark
steps:
- run: exit 0