From b2a01867e78f2c6a28db24023510e192d0ddcdcb Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sun, 7 Jan 2024 02:00:26 +0100 Subject: [PATCH] Apache Superset: Verify connectivity to CrateDB Add a few basic integration tests having conversations with the Apache Superset UI and HTTP API. --- .github/workflows/apache-superset.yml | 75 ++++++++++ .gitignore | 3 +- framework/apache-superset/README.md | 58 ++++++++ framework/apache-superset/conftest.py | 131 ++++++++++++++++++ framework/apache-superset/data.sql | 36 +++++ framework/apache-superset/pyproject.toml | 11 ++ .../apache-superset/requirements-test.txt | 3 + framework/apache-superset/requirements.txt | 3 + framework/apache-superset/superset_config.py | 32 +++++ framework/apache-superset/test.py | 91 ++++++++++++ framework/apache-superset/util.py | 13 ++ 11 files changed, 455 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/apache-superset.yml create mode 100644 framework/apache-superset/README.md create mode 100644 framework/apache-superset/conftest.py create mode 100644 framework/apache-superset/data.sql create mode 100644 framework/apache-superset/pyproject.toml create mode 100644 framework/apache-superset/requirements-test.txt create mode 100644 framework/apache-superset/requirements.txt create mode 100644 framework/apache-superset/superset_config.py create mode 100644 framework/apache-superset/test.py create mode 100644 framework/apache-superset/util.py diff --git a/.github/workflows/apache-superset.yml b/.github/workflows/apache-superset.yml new file mode 100644 index 00000000..191e289c --- /dev/null +++ b/.github/workflows/apache-superset.yml @@ -0,0 +1,75 @@ +name: Apache Superset + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/apache-superset.yml' + - 'framework/apache-superset/**' + - 'requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/apache-superset.yml' + - 'framework/apache-superset/**' + - 'requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + + tests: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ ubuntu-22.04 ] + superset-version: [ "2.*" ] + python-version: [ "3.11" ] + + services: + cratedb: + image: crate/crate:nightly + ports: + - 4200:4200 + - 5432:5432 + + name: Superset ${{ matrix.superset-version }}, Python ${{ matrix.python-version }} + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: "pip" + cache-dependency-path: | + pyproject.toml + requirements.txt + requirements-test.txt + + - name: Install utilities + run: | + pip install -r requirements.txt + + - name: Install Apache Superset ${{ matrix.superset-version }} + run: | + pip install 'apache-superset==${{ matrix.superset-version }}' + + - name: Validate framework/apache-superset + run: | + ngr test --accept-no-venv framework/apache-superset diff --git a/.gitignore b/.gitignore index 44639d77..d75463fe 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ .venv* __pycache__ .coverage +.DS_Store coverage.xml mlruns/ archive/ -logs.log \ No newline at end of file +logs.log diff --git a/framework/apache-superset/README.md b/framework/apache-superset/README.md new file mode 100644 index 00000000..d15ab848 --- /dev/null +++ b/framework/apache-superset/README.md @@ -0,0 +1,58 @@ +# Verify Apache Superset with CrateDB + +## About + +This folder includes software integration tests for verifying +that Apache Superset works well together with CrateDB. + +## Setup + +You can also exercise the configuration and setup steps manually. + +Start CrateDB. +```bash +docker run --rm -it --name=cratedb \ + --publish=4200:4200 --publish=5432:5432 \ + --env=CRATE_HEAP_SIZE=4g crate:latest -Cdiscovery.type=single-node +``` + +Setup sandbox and install packages. +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +Configure and initialize Apache Superset. +```bash +export FLASK_APP=superset +export SUPERSET_CONFIG_PATH=superset_config.py +superset db upgrade +superset fab create-admin --username=admin --password=admin --firstname=admin --lastname=admin --email=admin@example.org +superset init +``` + +Run Superset server. +```bash +superset run -p 8088 --with-threads +open http://127.0.0.1:8088/ +``` + +## API Usage + +```bash +# Authenticate and acquire a JWT token. +AUTH_TOKEN=$(http http://localhost:8088/api/v1/security/login username=admin password=admin provider=db | jq -r .access_token) + +# Create a data source item / database connection. +http http://localhost:8088/api/v1/database/ database_name="CrateDB Testdrive" engine=crate sqlalchemy_uri=crate://crate@localhost:4200 Authorization:"Bearer ${AUTH_TOKEN}" +``` + +```bash +# Create datasets and probe them. +crash < data.sql +http http://127.0.0.1:8088/api/v1/dataset/ Authorization:"Bearer ${AUTH_TOKEN}" database=1 schema=doc table_name=devices_info +http http://127.0.0.1:8088/api/v1/dataset/ Authorization:"Bearer ${AUTH_TOKEN}" database=1 schema=doc table_name=devices_readings +cat probe-1.json | http http://127.0.0.1:8088/api/v1/chart/data Authorization:"Bearer ${AUTH_TOKEN}" +cat probe-2.json | http http://127.0.0.1:8088/api/v1/chart/data Authorization:"Bearer ${AUTH_TOKEN}" +``` diff --git a/framework/apache-superset/conftest.py b/framework/apache-superset/conftest.py new file mode 100644 index 00000000..da9d4e98 --- /dev/null +++ b/framework/apache-superset/conftest.py @@ -0,0 +1,131 @@ +import os +import shlex +import shutil +import subprocess +import time + +import pytest +import requests + +from util import get_auth_headers + + +superset_env = { + "FLASK_APP": "superset", + "SUPERSET_CONFIG_PATH": "superset_config.py", +} +superset_bin = shutil.which("superset") + + +uri_database = "http://localhost:8088/api/v1/database/" + + +# Utility functions. + +def invoke_superset(command: str): + """ + Invoke `superset` command. + """ + command = f"{superset_bin} {command}" + subprocess.check_call(shlex.split(command), env=superset_env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + +# Test suite fixtures. + +@pytest.fixture(scope="session") +def fix_greenlet(): + """ + Install more recent greenlet, because Playwright installs version 3.0.1, which breaks Superset. + """ + os.system("pip install --upgrade greenlet") + + +@pytest.fixture(scope="session") +def playwright_install_firefox(): + """ + Playwright needs a browser. + """ + os.system("playwright install firefox") + + +@pytest.fixture(scope="session") +def initialize_superset(): + """ + Run the Apache Superset setup procedure. + """ + invoke_superset("db upgrade") + invoke_superset("fab create-admin --username=admin --password=admin --firstname=admin --lastname=admin --email=admin@example.org") + invoke_superset("init") + + +@pytest.fixture(scope="session") +def reset_superset(): + """ + Reset database connections and datasets. + """ + resources_to_delete = [ + "http://localhost:8088/api/v1/dataset/1", + "http://localhost:8088/api/v1/dataset/2", + "http://localhost:8088/api/v1/database/1", + "http://localhost:8088/api/v1/database/2", + ] + for resource_to_delete in resources_to_delete: + response = requests.delete(resource_to_delete, headers=get_auth_headers()) + assert response.status_code in [200, 404], response.json() + + +@pytest.fixture(scope="session") +def start_superset(): + """ + Start the Apache Superset server. + """ + command = f"{superset_bin} run -p 8088 --with-threads" + daemon = subprocess.Popen(shlex.split(command), env=superset_env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + # Give the server time to start + time.sleep(4) + # Check it started successfully + assert not daemon.poll(), daemon.stdout.read().decode("utf-8") + yield daemon + # Shut it down at the end of the pytest session + daemon.terminate() + + +@pytest.fixture(scope="session") +def provision_superset(start_superset): + """ + Provision Superset by creating a database connection object for CrateDB. + """ + + # Create a data source item / database connection. + response = requests.post( + uri_database, + headers=get_auth_headers(), + json={"database_name": "CrateDB Testdrive", "engine": "crate", "sqlalchemy_uri": "crate://crate@localhost:4200"}, + ) + + assert response.status_code == 201 + assert response.json() == { + "id": 1, + "result": { + "configuration_method": "sqlalchemy_form", + "database_name": "CrateDB Testdrive", + "driver": "crate-python", + "expose_in_sqllab": True, + "sqlalchemy_uri": "crate://crate@localhost:4200", + }, + } + + +@pytest.fixture(scope="session", autouse=True) +def do_setup( + fix_greenlet, + playwright_install_firefox, + initialize_superset, + start_superset, + reset_superset, + provision_superset, +): + """ + Provide a fully configured and provisioned Apache Superset instance to the test suite. + """ + pass diff --git a/framework/apache-superset/data.sql b/framework/apache-superset/data.sql new file mode 100644 index 00000000..bdc2839b --- /dev/null +++ b/framework/apache-superset/data.sql @@ -0,0 +1,36 @@ +-- https://github.com/crate/cratedb-datasets + +CREATE TABLE IF NOT EXISTS devices_readings ( + "ts" TIMESTAMP WITH TIME ZONE, + "device_id" TEXT, + "battery" OBJECT(DYNAMIC) AS ( + "level" BIGINT, + "status" TEXT, + "temperature" DOUBLE PRECISION + ), + "cpu" OBJECT(DYNAMIC) AS ( + "avg_1min" DOUBLE PRECISION, + "avg_5min" DOUBLE PRECISION, + "avg_15min" DOUBLE PRECISION + ), + "memory" OBJECT(DYNAMIC) AS ( + "free" BIGINT, + "used" BIGINT + ) +); + +CREATE TABLE IF NOT EXISTS devices_info ( + "device_id" TEXT, + "api_version" TEXT, + "manufacturer" TEXT, + "model" TEXT, + "os_name" TEXT +); + +COPY "devices_readings" + FROM 'https://github.com/crate/cratedb-datasets/raw/main/cloud-tutorials/devices_readings.json.gz' + WITH (compression = 'gzip'); + +COPY "devices_info" + FROM 'https://github.com/crate/cratedb-datasets/raw/main/cloud-tutorials/devices_info.json.gz' + WITH (compression = 'gzip'); diff --git a/framework/apache-superset/pyproject.toml b/framework/apache-superset/pyproject.toml new file mode 100644 index 00000000..c66d60dd --- /dev/null +++ b/framework/apache-superset/pyproject.toml @@ -0,0 +1,11 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] diff --git a/framework/apache-superset/requirements-test.txt b/framework/apache-superset/requirements-test.txt new file mode 100644 index 00000000..c0798858 --- /dev/null +++ b/framework/apache-superset/requirements-test.txt @@ -0,0 +1,3 @@ +playwright<2 +pytest<8 +requests<3 diff --git a/framework/apache-superset/requirements.txt b/framework/apache-superset/requirements.txt new file mode 100644 index 00000000..ff1589d5 --- /dev/null +++ b/framework/apache-superset/requirements.txt @@ -0,0 +1,3 @@ +apache-superset==2.* +crate[sqlalchemy]==0.34.0 +marshmallow_enum<2 # Seems to be missing from `apache-superset`? diff --git a/framework/apache-superset/superset_config.py b/framework/apache-superset/superset_config.py new file mode 100644 index 00000000..24e16807 --- /dev/null +++ b/framework/apache-superset/superset_config.py @@ -0,0 +1,32 @@ +# Superset specific config +ROW_LIMIT = 5000 + +# Flask App Builder configuration +# Your App secret key will be used for securely signing the session cookie +# and encrypting sensitive information on the database +# Make sure you are changing this key for your deployment with a strong key. +# Alternatively you can set it with `SUPERSET_SECRET_KEY` environment variable. +# You MUST set this for production environments or the server will not refuse +# to start and you will see an error in the logs accordingly. +SECRET_KEY = 'VcKzHS4g2h+dP33tCbqOghtKaU37wvFECMhVqrfccaoI/17qh/j3+VDV' + +# The SQLAlchemy connection string to your database backend +# This connection defines the path to the database that stores your +# superset metadata (slices, connections, tables, dashboards, ...). +# Note that the connection information to connect to the datasources +# you want to explore are managed directly in the web UI +# The check_same_thread=false property ensures the sqlite client does not attempt +# to enforce single-threaded access, which may be problematic in some edge cases +# When not configured, the default location is `~/.superset/superset.db`. +# See also https://superset.apache.org/docs/installation/configuring-superset/. +# SQLALCHEMY_DATABASE_URI = 'sqlite:////path/to/superset.db?check_same_thread=false' + +# Flask-WTF flag for CSRF +WTF_CSRF_ENABLED = True +# Add endpoints that need to be exempt from CSRF protection +WTF_CSRF_EXEMPT_LIST = [] +# A CSRF token that expires in 1 year +WTF_CSRF_TIME_LIMIT = 60 * 60 * 24 * 365 + +# Set this API key to enable Mapbox visualizations +MAPBOX_API_KEY = '' diff --git a/framework/apache-superset/test.py b/framework/apache-superset/test.py new file mode 100644 index 00000000..7b55b426 --- /dev/null +++ b/framework/apache-superset/test.py @@ -0,0 +1,91 @@ +import requests +from playwright.sync_api import sync_playwright + +from util import get_auth_headers + + +uri_dataset = "http://127.0.0.1:8088/api/v1/dataset/" +uri_chart = "http://127.0.0.1:8088/api/v1/chart/data" + + +def test_api(): + """ + Create datasets and probe creating a graph using the Superset HTTP API. + """ + requests.post( + uri_dataset, + headers=get_auth_headers(), + json={"database": 1, "schema": "sys", "table_name": "summits"}, + ).raise_for_status() + + graph_request = { + "datasource": { + "id": 1, + "type": "table" + }, + "queries": [ + { + "metrics": [ + "count" + ] + } + ], + "result_format": "json", + "result_type": "full", + } + response = requests.post(uri_chart, headers=get_auth_headers(), json=graph_request) + response.raise_for_status() + assert response.status_code == 200 + result = response.json()["result"][0] + assert result["data"] == [{'count': 1605}] + + +def test_ui(): + """ + Log in to Superset UI, navigate to SQL Lab, and exercise a query. + """ + uri_home = "http://127.0.0.1:8088/" + uri_sqllab = "http://127.0.0.1:8088/superset/sqllab" + + with sync_playwright() as p: + browser = p.firefox.launch( + headless=True, + firefox_user_prefs={ + "network.cookie.cookieBehavior": 4, + "security.insecure_field_warning.contextual.enabled": False, + "security.certerrors.permanentOverride": False, + "network.stricttransportsecurity.preloadlist": False, + "security.enterprise_roots.enabled": True, + "security.mixed_content.block_active_content": False, + }) + + # Navigate to Apache Superset. + page = browser.new_page() + page.goto(uri_home) + + # Run the login procedure. + assert page.text_content(".panel-title") == "Sign In" + assert page.url.endswith("/login/") + page.type("input#username", "admin") + page.type("input#password", "admin") + page.click("input[type=submit]") + + # Verify login was successful, and being navigated to `Home`. + html_title = page.text_content("title").strip() + assert html_title == "Superset" + assert page.url.endswith("/superset/welcome/") + assert page.text_content("h1") == "Home" + + # Invoke SQL Lab with an example query, and verify response. + sql = "SELECT * FROM sys.summits LIMIT 42;" + page.goto(uri_sqllab) + page.wait_for_selector("#ace-editor") + page.evaluate(f"ace.edit('ace-editor').setValue('{sql}')") + page.get_by_role("button", name="Run").click() + page.wait_for_timeout(500) + page_body = page.text_content("div.ant-tabs-content-holder") + assert "42 rows returned" in page_body + assert "Monte Rosa" in page_body + + # That's it. + browser.close() diff --git a/framework/apache-superset/util.py b/framework/apache-superset/util.py new file mode 100644 index 00000000..d3e5b7cc --- /dev/null +++ b/framework/apache-superset/util.py @@ -0,0 +1,13 @@ +import requests + +uri_login = "http://localhost:8088/api/v1/security/login" + + +def get_access_token(): + response = requests.post(uri_login, json={"username": "admin", "password": "admin", "provider": "db"}) + return response.json()["access_token"] + + +def get_auth_headers(): + access_token = get_access_token() + return {"Authorization": f"Bearer {access_token}"}