diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 010dcd14..444a5c5c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -100,6 +100,11 @@ updates: schedule: interval: "daily" + - directory: "/application/records" + package-ecosystem: "pip" + schedule: + interval: "daily" + # Frameworks. - directory: "/framework/dbt/basic" @@ -112,6 +117,11 @@ updates: schedule: interval: "daily" + - directory: "/framework/records" + package-ecosystem: "pip" + schedule: + interval: "daily" + - directory: "/framework/streamlit" package-ecosystem: "pip" schedule: diff --git a/.github/workflows/application-records.yml b/.github/workflows/application-records.yml new file mode 100644 index 00000000..8b7ced85 --- /dev/null +++ b/.github/workflows/application-records.yml @@ -0,0 +1,74 @@ +name: records (application) + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/application-records.yml' + - 'application/records/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/application-records.yml' + - 'application/records/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + test: + name: " + Python: ${{ matrix.python-version }} + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ 'ubuntu-latest' ] + python-version: [ '3.9', '3.13' ] + cratedb-version: [ 'nightly' ] + + services: + cratedb: + image: crate/crate:${{ matrix.cratedb-version }} + ports: + - 4200:4200 + - 5432:5432 + env: + CRATE_HEAP_SIZE: 4g + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: | + requirements.txt + application/records/requirements.txt + application/records/requirements-dev.txt + + - name: Install utilities + run: | + pip install -r requirements.txt + + - name: Validate application/records + run: | + ngr test --accept-no-venv application/records diff --git a/.github/workflows/framework-records.yml b/.github/workflows/framework-records.yml new file mode 100644 index 00000000..4576eb5c --- /dev/null +++ b/.github/workflows/framework-records.yml @@ -0,0 +1,74 @@ +name: records (framework) + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/framework-records.yml' + - 'framework/records/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/framework-records.yml' + - 'framework/records/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + test: + name: " + Python: ${{ matrix.python-version }} + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ 'ubuntu-latest' ] + python-version: [ '3.9', '3.13' ] + cratedb-version: [ 'nightly' ] + + services: + cratedb: + image: crate/crate:${{ matrix.cratedb-version }} + ports: + - 4200:4200 + - 5432:5432 + env: + CRATE_HEAP_SIZE: 4g + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: | + requirements.txt + framework/records/requirements.txt + framework/records/requirements-dev.txt + + - name: Install utilities + run: | + pip install -r requirements.txt + + - name: Validate framework/records + run: | + ngr test --accept-no-venv framework/records diff --git a/application/records/README.md b/application/records/README.md new file mode 100644 index 00000000..25f77f9d --- /dev/null +++ b/application/records/README.md @@ -0,0 +1,57 @@ +# Verify the `records` program with CrateDB + +Records: SQL for Humans™ + +## About + +This folder includes software integration tests for verifying +that the [Records] Python program works well together with [CrateDB]. + +Records is a very simple, but powerful, library for making raw SQL +queries to most relational databases. It uses [SQLAlchemy]. + +Records is intended for report-style exports of database queries, and +has not yet been optimized for extremely large data dumps. + +## What's Inside + +- `example.sh`: A few examples that read CrateDB's `sys.summits` table + using the `records` program. A single example that inserts data into + a column using CrateDB's `OBJECT` column. + +## Install + +Set up sandbox and install packages. +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r requirements.txt +``` + +## Synopsis +Install packages. +```shell +pip install --upgrade records sqlalchemy-cratedb +``` +Define database connection URL, suitable for CrateDB on localhost. +For CrateDB Cloud, use `crate://:@`. +```shell +export DATABASE_URL="crate://" +``` +Invoke query. +```shell +records "SELECT * FROM sys.summits WHERE region ILIKE :region" region="ortler%" +``` + +## Tests + +Run integration tests. +```bash +sh test.sh +``` + + +[CrateDB]: https://cratedb.com/database +[Records]: https://pypi.org/project/records/ +[SQLAlchemy]: https://www.sqlalchemy.org/ diff --git a/application/records/example.sh b/application/records/example.sh new file mode 100644 index 00000000..206db63e --- /dev/null +++ b/application/records/example.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env sh + +# Using `records` with CrateDB: Basic usage. +# +# pip install --upgrade records sqlalchemy-cratedb +# +# A few basic operations using the `records` program with CrateDB. +# +# - https://pypi.org/project/records/ + +# Define database connection URL, suitable for CrateDB on localhost. +# For CrateDB Cloud, use `crate://:@`. +export DATABASE_URL="crate://" + +# Basic query, tabular output. +records "SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3" + +# Query with parameters. +records "SELECT * FROM sys.summits WHERE region ILIKE :region" region="ortler%" + +# Export data. +# Supported formats: csv tsv json yaml html xls xlsx dbf latex ods +records "SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3" csv +records "SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3" json +records "SELECT * FROM sys.summits LIMIT 42" html > "${TMPDIR}/sys_summits.html" +records "SELECT * FROM sys.summits LIMIT 42" ods > "${TMPDIR}/sys_summits.ods" +records "SELECT * FROM sys.summits LIMIT 42" xlsx > "${TMPDIR}/sys_summits.xlsx" + +# Insert data. +records "DROP TABLE IF EXISTS testdrive.example" +records "CREATE TABLE testdrive.example (data OBJECT(DYNAMIC))" +records "INSERT INTO testdrive.example (data) VALUES (:data)" data='{"temperature": 42.42, "humidity": 84.84}' +records "REFRESH TABLE testdrive.example" +records "SELECT * FROM testdrive.example" diff --git a/application/records/pyproject.toml b/application/records/pyproject.toml new file mode 100644 index 00000000..dcfa0c50 --- /dev/null +++ b/application/records/pyproject.toml @@ -0,0 +1,23 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEX -p pytester --strict-markers --verbosity=3 + --capture=no + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] + + +[tool.coverage.run] +branch = false +omit = [ + "test*", +] + +[tool.coverage.report] +fail_under = 0 +show_missing = true diff --git a/application/records/requirements-test.txt b/application/records/requirements-test.txt new file mode 100644 index 00000000..508a3d0d --- /dev/null +++ b/application/records/requirements-test.txt @@ -0,0 +1 @@ +pytest<9 diff --git a/application/records/requirements.txt b/application/records/requirements.txt new file mode 100644 index 00000000..7bfcd8a6 --- /dev/null +++ b/application/records/requirements.txt @@ -0,0 +1,3 @@ +records<0.7 +sqlalchemy-cratedb<0.41 +tablib[ods] diff --git a/application/records/test.py b/application/records/test.py new file mode 100644 index 00000000..b793b576 --- /dev/null +++ b/application/records/test.py @@ -0,0 +1,11 @@ +import shlex +import subprocess +import pytest + + +def run(command: str): + subprocess.check_call(shlex.split(command)) + + +def test_example(): + run("sh example.sh") diff --git a/framework/records/README.md b/framework/records/README.md new file mode 100644 index 00000000..66a5662f --- /dev/null +++ b/framework/records/README.md @@ -0,0 +1,62 @@ +# Verify the `records` library with CrateDB + +Records: SQL for Humans™ + +## About + +This folder includes software integration tests for verifying +that the [Records] Python library works well together with [CrateDB]. + +Records is a very simple, but powerful, library for making raw SQL +queries to most relational databases. It uses [SQLAlchemy]. + +Records is intended for report-style exports of database queries, and +has not yet been optimized for extremely large data dumps. + +## What's Inside + +- `example_basic.py`: A few examples that read CrateDB's `sys.summits` table. + An example inquiring existing tables. + +- `example_types.py`: An example that exercises all data types supported by + CrateDB. + +## Install + +Set up sandbox and install packages. +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r requirements.txt -r requirements-test.txt +``` + +## Synopsis +```shell +pip install --upgrade records sqlalchemy-cratedb +``` +```python +from pprint import pprint +import records + +# Define database connection URL, suitable for CrateDB on localhost. +# For CrateDB Cloud, use `crate://:@`. +db = records.Database("crate://") + +# Invoke query. +rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") +data = rows.all() +pprint(data) +``` + +## Tests + +Run integration tests. +```bash +pytest +``` + + +[CrateDB]: https://cratedb.com/database +[Records]: https://pypi.org/project/records/ +[SQLAlchemy]: https://www.sqlalchemy.org/ diff --git a/framework/records/example_basic.py b/framework/records/example_basic.py new file mode 100644 index 00000000..4cfab056 --- /dev/null +++ b/framework/records/example_basic.py @@ -0,0 +1,58 @@ +""" +Using `records` with CrateDB: Basic usage. + + pip install --upgrade records sqlalchemy-cratedb + +A few basic operations using the `records` library with CrateDB. + +- https://pypi.org/project/records/ +""" + +import records + + +def records_select_sys_summits(): + """ + Query CrateDB's built-in `sys.summits` table. + :return: + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.all() + return data + + +def records_export_sys_summits_pandas(): + """ + Query CrateDB's built-in `sys.summits` table, returning a pandas dataframe. + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.export("df") + return data + + +def records_export_sys_summits_csv(): + """ + Query CrateDB's built-in `sys.summits` table, returning CSV. + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.export("csv") + return data + + +def records_get_table_names(): + """ + Inquire table names of the system schema `sys`. + """ + db = records.Database("crate://?schema=sys", echo=True) + table_names = db.get_table_names() + return table_names + + +if __name__ == "__main__": + print(records_select_sys_summits()) + print(records_export_sys_summits_pandas()) + print(records_export_sys_summits_csv()) + print(records_get_table_names()) diff --git a/framework/records/example_types.py b/framework/records/example_types.py new file mode 100644 index 00000000..e57db0ba --- /dev/null +++ b/framework/records/example_types.py @@ -0,0 +1,148 @@ +""" +Using `records` with CrateDB: All data types. + + pip install --upgrade records sqlalchemy-cratedb + +An end-to-end lifecycle, defining a table, inserting data, and querying it. +This example uses all data types supported by CrateDB. + +- https://pypi.org/project/records/ +- https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types +""" + +from copy import deepcopy + +import pytest +import records + + +# The record that is inserted into the database. +RECORD_IN = dict( + null_integer=None, + integer=42, + bigint=42, + float=42.42, + double=42.42, + decimal=42.42, + bit="01010101", + bool=True, + text="foobar", + char="foo", + timestamp_tz="1970-01-02T00:00:00+01:00", + timestamp_notz="1970-01-02T00:00:00", + ip="127.0.0.1", + array=["foo", "bar"], + object={"for": "bar"}, + geopoint=[85.43, 66.23], + geoshape="POLYGON ((5 5, 10 5, 10 10, 5 10, 5 5))", + float_vector=[1.0, 2.0, 3.0], +) + +# When querying it, a few values will be canonicalized. +RECORD_OUT = deepcopy(RECORD_IN) +RECORD_OUT.update( + dict( + bit="B'01010101'", + char="foo ", + timestamp_tz=82800000, + timestamp_notz=86400000, + geopoint=[pytest.approx(85.43), pytest.approx(66.23)], + geoshape={ + "coordinates": [ + [[5.0, 5.0], [5.0, 10.0], [10.0, 10.0], [10.0, 5.0], [5.0, 5.0]] + ], + "type": "Polygon", + }, + ) +) + + +def records_ddl_dml_dql(): + """ + Validate all types of CrateDB. + + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types + """ + db = records.Database("crate://", echo=True) + + # DDL + db.query("DROP TABLE IF EXISTS testdrive.example;") + db.query(""" + CREATE TABLE testdrive.example ( + -- Numeric types + null_integer INT, + integer INT, + bigint BIGINT, + float FLOAT, + double DOUBLE, + decimal DECIMAL(8, 2), + -- Other scalar types + bit BIT(8), + bool BOOLEAN, + text TEXT, + char CHARACTER(5), + timestamp_tz TIMESTAMP WITH TIME ZONE, + timestamp_notz TIMESTAMP WITHOUT TIME ZONE, + ip IP, + -- Container types + "array" ARRAY(STRING), + "object" OBJECT(DYNAMIC), + -- Geospatial types + geopoint GEO_POINT, + geoshape GEO_SHAPE, + -- Vector type + "float_vector" FLOAT_VECTOR(3) + ); + """) + + # DML + db.query( + """ + INSERT INTO testdrive.example ( + null_integer, + integer, + bigint, + float, + double, + decimal, + bit, + bool, + text, + char, + timestamp_tz, + timestamp_notz, + ip, + "array", + "object", + geopoint, + geoshape, + float_vector + ) VALUES ( + :null_integer, + :integer, + :bigint, + :float, + :double, + :decimal, + :bit, + :bool, + :text, + :char, + :timestamp_tz, + :timestamp_notz, + :ip, + :array, + :object, + :geopoint, + :geoshape, + :float_vector + ); + """, + **RECORD_IN, + ) + + # DQL + db.query("REFRESH TABLE testdrive.example") + rows = db.query("SELECT * FROM testdrive.example") + data = rows.all() + return data diff --git a/framework/records/pyproject.toml b/framework/records/pyproject.toml new file mode 100644 index 00000000..b7997815 --- /dev/null +++ b/framework/records/pyproject.toml @@ -0,0 +1,12 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + --capture=no + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] diff --git a/framework/records/requirements-test.txt b/framework/records/requirements-test.txt new file mode 100644 index 00000000..508a3d0d --- /dev/null +++ b/framework/records/requirements-test.txt @@ -0,0 +1 @@ +pytest<9 diff --git a/framework/records/requirements.txt b/framework/records/requirements.txt new file mode 100644 index 00000000..b7ab3b8f --- /dev/null +++ b/framework/records/requirements.txt @@ -0,0 +1,3 @@ +records<0.7 +sqlalchemy-cratedb<0.41 +tablib[pandas] diff --git a/framework/records/test.py b/framework/records/test.py new file mode 100644 index 00000000..ccefb9cc --- /dev/null +++ b/framework/records/test.py @@ -0,0 +1,42 @@ +from example_basic import ( + records_select_sys_summits, + records_get_table_names, + records_export_sys_summits_csv, + records_export_sys_summits_pandas, +) +from example_types import records_ddl_dml_dql, RECORD_OUT + + +def test_sys_summits(): + """ + Read built-in data from CrateDB's `sys` table through `records`. + """ + data = records_select_sys_summits() + assert data[0]["mountain"] == "Mont Blanc" + + +def test_get_table_names(): + data = records_get_table_names() + assert "nodes" in data + assert "shards" in data + assert len(data) > 10 + + +def test_export_sys_summits_pandas(): + data = records_export_sys_summits_pandas() + assert list(data["mountain"]) == ["Mont Blanc", "Monte Rosa", "Dom"] + + +def test_export_sys_summits_csv(): + data = records_export_sys_summits_csv() + assert "classification,coordinates,country" in data + assert "Mont Blanc,4695,U-Savoy/Aosta" in data + + +def test_ddl_dml_dql(): + """ + Validate an end-to-end lifecycle, defining a table, inserting data, and querying it. + This example uses all data types supported by CrateDB. + """ + data = records_ddl_dml_dql() + assert data[0].as_dict() == RECORD_OUT