From 9e2fe2a904b27a3c584c8174be36f00d1a0b0680 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 23 Nov 2024 17:09:22 +0100 Subject: [PATCH] Metabase: Add test harness for validating Metabase against CrateDB A basic test case that reads CrateDB's `sys.summit` table through Metabase, after connecting CrateDB as a PostgreSQL database. --- .github/dependabot.yml | 10 ++ .github/workflows/application-metabase.yml | 72 +++++++++ application/metabase/README.md | 34 +++++ application/metabase/backlog.md | 46 ++++++ application/metabase/docker-compose.yml | 51 +++++++ application/metabase/metabase_rig.py | 163 +++++++++++++++++++++ application/metabase/pyproject.toml | 14 ++ application/metabase/requirements-test.txt | 2 + application/metabase/requirements.txt | 1 + application/metabase/test.py | 35 +++++ 10 files changed, 428 insertions(+) create mode 100644 .github/workflows/application-metabase.yml create mode 100644 application/metabase/README.md create mode 100644 application/metabase/backlog.md create mode 100644 application/metabase/docker-compose.yml create mode 100644 application/metabase/metabase_rig.py create mode 100644 application/metabase/pyproject.toml create mode 100644 application/metabase/requirements-test.txt create mode 100644 application/metabase/requirements.txt create mode 100644 application/metabase/test.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a95247d8..010dcd14 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -90,6 +90,16 @@ updates: schedule: interval: "daily" + - directory: "/application/metabase" + package-ecosystem: "pip" + schedule: + interval: "daily" + + - directory: "/application/metabase" + package-ecosystem: "docker" + schedule: + interval: "daily" + # Frameworks. - directory: "/framework/dbt/basic" diff --git a/.github/workflows/application-metabase.yml b/.github/workflows/application-metabase.yml new file mode 100644 index 00000000..f625a04d --- /dev/null +++ b/.github/workflows/application-metabase.yml @@ -0,0 +1,72 @@ +name: CrateDB Toolkit + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/application-metabase.yml' + - 'application/metabase/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/application-metabase.yml' + - 'application/metabase/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + + test: + name: " + Python: ${{ matrix.python-version }} + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-22.04" ] + python-version: [ "3.12" ] + cratedb-version: [ "nightly" ] + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: "pip" + cache-dependency-path: | + pyproject.toml + requirements.txt + requirements-test.txt + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "latest" + + - name: Install utilities + run: | + uv pip install -r requirements.txt + + - name: Validate application/metabase + run: | + ngr test --accept-no-venv application/metabase diff --git a/application/metabase/README.md b/application/metabase/README.md new file mode 100644 index 00000000..9ad655dd --- /dev/null +++ b/application/metabase/README.md @@ -0,0 +1,34 @@ +# Verify Metabase with CrateDB + +## About + +This folder includes software integration tests for verifying +that Metabase works well together with CrateDB. +The test harness is based on Docker Compose. + +## What's Inside + +A basic test case that reads CrateDB's `sys.summit` table through +Metabase, after connecting CrateDB as a PostgreSQL database. + +## Setup + +Setup sandbox and install packages. +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r requirements.txt -r requirements-test.txt +``` + +## Usage + +Run integration tests. +```bash +pytest +``` + +Watch service logs. +```shell +docker compose logs -f +``` diff --git a/application/metabase/backlog.md b/application/metabase/backlog.md new file mode 100644 index 00000000..eda1a312 --- /dev/null +++ b/application/metabase/backlog.md @@ -0,0 +1,46 @@ +# CrateDB <-> Metabase backlog + + +## metabase/metabase:v0.45.4.3 + +``` +2024-11-22 23:22:07,139 ERROR driver.util :: Failed to connect to Database +org.postgresql.util.PSQLException: The server does not support SSL. +``` + +``` +2024-11-22 23:22:07,290 WARN metabase.email :: Failed to send email +clojure.lang.ExceptionInfo: SMTP host is not set. {:cause :smtp-host-not-set} +``` + +``` +2024-11-22 23:22:08,189 WARN sync.util :: Error running step 'sync-timezone' for postgres Database 2 'cratedb-testdrive' +java.lang.Exception: Unable to parse date string '2024-11-22 23:22:08.175 ' for database engine 'postgres' +``` + +``` +2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type 'regclass' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,726 WARN sync.describe-table :: Don't know how to map column type '_int2' to a Field base_type, falling back to :type/*. +... +``` + +``` +2024-11-22 23:22:13,900 WARN sync.util :: Error fingerprinting Table 12 'sys.jobs' +clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:359: no viable alternative at input 'SELECT "source"."substring531" AS "substring531", "source"."substring532" AS "substring532", "source"."substring533" AS "substring533", "source"."started" AS "started", "source"."substring534" AS "substring534", "source"."substring535" AS "substring535", "source"."substring536" AS "substring536" FROM (SELECT "sys"."jobs"."id" AS "id", ("sys"."jobs"."node"#>' +``` + +``` +2024-11-22 23:22:14,390 WARN sync.util :: Error fingerprinting Table 13 'sys.nodes' +clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:97: no viable alternative at input 'SELECT "source"."load['probe_timestamp']" AS "load['probe_timestamp']", ("source"."fs['total']"#>' +``` + +``` +2024-11-22 23:22:23,588 ERROR models.field-values :: Error fetching field values +clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conffeqop': invalid data type 'integer_array'. + +2024-11-22 23:22:23,599 ERROR models.field-values :: Error fetching field values +clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conkey': invalid data type 'smallint_array'. +``` diff --git a/application/metabase/docker-compose.yml b/application/metabase/docker-compose.yml new file mode 100644 index 00000000..f55b31dd --- /dev/null +++ b/application/metabase/docker-compose.yml @@ -0,0 +1,51 @@ +networks: + metanet-demo: + driver: bridge + +services: + + # Metabase + # https://www.metabase.com/docs/latest/installation-and-operation/running-metabase-on-docker#example-docker-compose-yaml-file + metabase: + image: metabase/metabase:v0.45.4.3 + container_name: metabase + hostname: metabase + volumes: + - /dev/urandom:/dev/random:ro + ports: + - 3000:3000 + networks: + - metanet-demo + healthcheck: + test: curl --fail -I http://localhost:3000/api/health || exit 1 + interval: 15s + timeout: 5s + retries: 5 + + # CrateDB + # https://github.com/crate/crate + cratedb: + image: crate/crate:nightly + container_name: cratedb + hostname: cratedb + ports: + - 4200:4200 + - 5432:5432 + networks: + - metanet-demo + healthcheck: + # https://github.com/crate/docker-crate/pull/151/files + test: curl --max-time 25 http://localhost:4200 || exit 1 + interval: 30s + timeout: 30s + + # Wait for all defined services to be fully available by probing their health + # status, even when using `docker compose up --detach`. + # https://marcopeg.com/2019/docker-compose-healthcheck/ + wait: + image: dadarek/wait-for-dependencies + depends_on: + metabase: + condition: service_healthy + cratedb: + condition: service_healthy diff --git a/application/metabase/metabase_rig.py b/application/metabase/metabase_rig.py new file mode 100644 index 00000000..3b849c63 --- /dev/null +++ b/application/metabase/metabase_rig.py @@ -0,0 +1,163 @@ +import time +from functools import lru_cache + +import requests +from metabase_api import Metabase_API + + +class MetabaseRig: + """ + Support end-to-end testing of CrateDB and Metabase. + + https://www.metabase.com/docs/latest/api-documentation + + Authenticate your requests with a session token + https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#authenticate-your-requests-with-a-session-token + """ + def __init__(self, url: str): + self.username = "foobar@example.org" + self.password = "123456metabase" + self.mb = None + + self.url = url + self.api_url = f"{url.rstrip('/')}/api" + self.session = requests.Session() + self.session_token = None + + def get_setup_token(self) -> str: + response = self.session.get(f"{self.api_url}/session/properties") + return response.json()["setup-token"] + + def setup(self): + """ + Run Metabase setup, create admin user, and return a session ID. + + https://www.metabase.com/docs/latest/api/setup#post-apisetup + https://discourse.metabase.com/t/rest-api-for-initial-setup-process/3419 + """ + response = self.session.post(f"{self.api_url}/setup", json={ + "prefs": { + "allow_tracking": "false", + "site_locale": "en", + "site_name": "Hotzenplotz", + }, + "user": { + "password": self.password, + "password_confirm": self.password, + "email": self.username, + }, + "token": self.get_setup_token(), + }) + self.session_token = response.json()["id"] + + def login(self): + self.session.post(f"{self.api_url}/session", json={ + "username": self.username, + "password": self.password, + }) + self.mb = Metabase_API(self.url, self.username, self.password) + + def get_databases(self): + return self.session.get(f"{self.api_url}/database").json() + + def database(self, name: str) -> "MetabaseDatabase": + return MetabaseDatabase(rig=self, name=name) + + +class MetabaseDatabase: + def __init__(self, rig: MetabaseRig, name: str): + self.rig = rig + self.name = name + self.timeout = 15 + + @property + @lru_cache(maxsize=None) + def id(self): + return self.rig.mb.get_item_id("database", self.name) + + def create(self): + """ + https://www.metabase.com/docs/latest/api/database#post-apidatabase + """ + self.rig.session.post( + f"{self.rig.api_url}/database", + json={ + "engine": "postgres", + "name": self.name, + "details": { + "host": "cratedb", + "port": 5432, + "user": "crate", + }, + }, + ) + + def exists(self): + try: + response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}") + return response.status_code == 200 + except ValueError as ex: + if "There is no DB with the name" not in str(ex): + raise + return False + + def schema(self, name: str): + response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}/schema/{name}") + response.raise_for_status() + return response.json() + + def table_names(self, schema_name: str): + names = [] + for item in self.schema(name=schema_name): + names.append(f"{item['schema']}.{item['name']}") + return names + + def table_id_by_name(self, name: str): + return self.rig.mb.get_item_id("table", name) + + def query(self, table: str): + response = self.rig.session.post( + f"{self.rig.api_url}/dataset", + json={ + "database": self.id, + "query": { + "source-table": self.table_id_by_name(table), + }, + "type": "query", + "parameters": [], + } + ) + return response.json() + + def wait_database(self): + def condition(): + return self.exists() + return self._wait(condition, f"Database not found: {self.name}") + + def wait_schema(self, name: str): + def condition(): + try: + if schema := self.schema(name): + return schema + except requests.RequestException: + pass + return False + return self._wait(condition, f"Database schema '{name}' not found in database '{self.name}'") + + def wait_table(self, schema: str, name: str): + def condition(): + if schema_info := self.wait_schema(schema): + for item in schema_info: + if item["name"] == name and item["initial_sync_status"] == "complete": + return True + return self._wait(condition, f"Table not found: {schema}.{name}") + + def _wait(self, condition, timeout_message): + timeout = self.timeout + while True: + if result := condition(): + return result + if timeout == 0: + raise TimeoutError(timeout_message) + timeout -= 1 + time.sleep(1) diff --git a/application/metabase/pyproject.toml b/application/metabase/pyproject.toml new file mode 100644 index 00000000..d8811ca2 --- /dev/null +++ b/application/metabase/pyproject.toml @@ -0,0 +1,14 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + --capture=no + --docker-compose-no-build + --use-running-containers + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] diff --git a/application/metabase/requirements-test.txt b/application/metabase/requirements-test.txt new file mode 100644 index 00000000..1777869d --- /dev/null +++ b/application/metabase/requirements-test.txt @@ -0,0 +1,2 @@ +pytest<9 +pytest-docker-compose-v2<0.2 diff --git a/application/metabase/requirements.txt b/application/metabase/requirements.txt new file mode 100644 index 00000000..5579ba96 --- /dev/null +++ b/application/metabase/requirements.txt @@ -0,0 +1 @@ +metabase-api<3.5 diff --git a/application/metabase/test.py b/application/metabase/test.py new file mode 100644 index 00000000..d23f389c --- /dev/null +++ b/application/metabase/test.py @@ -0,0 +1,35 @@ +from metabase_rig import MetabaseRig + +pytest_plugins = ["docker_compose"] + + +def test_api_sys_summits(session_scoped_container_getter): + """ + End-to-end test reading data from CrateDB through Metabase. + """ + rig = MetabaseRig("http://localhost:3000/") + + # Login to and optionally provision Metabase. + try: + rig.login() + except: + rig.setup() + rig.login() + + # Acquire a database handle, optionally creating a database. + db = rig.database("cratedb-testdrive") + if not db.exists(): + db.create() + + # Wait for the `sys` schema to become available. + db.wait_schema("sys") + + # Validate a table exists in the `sys` schema. + assert "sys.summits" in db.table_names(schema_name="sys") + + # Wait for the `sys.summits` table to become available. + db.wait_table("sys", "summits") + + # Query the `sys.summits` table. + data = db.query("summits") + assert data["data"]["rows"][0][5] == "Mont Blanc"