diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a95247d8..010dcd14 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -90,6 +90,16 @@ updates: schedule: interval: "daily" + - directory: "/application/metabase" + package-ecosystem: "pip" + schedule: + interval: "daily" + + - directory: "/application/metabase" + package-ecosystem: "docker" + schedule: + interval: "daily" + # Frameworks. - directory: "/framework/dbt/basic" diff --git a/.github/workflows/application-metabase.yml b/.github/workflows/application-metabase.yml new file mode 100644 index 00000000..59899daf --- /dev/null +++ b/.github/workflows/application-metabase.yml @@ -0,0 +1,71 @@ +name: Metabase + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/application-metabase.yml' + - 'application/metabase/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/application-metabase.yml' + - 'application/metabase/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + + test: + name: " + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-22.04" ] + python-version: [ "3.12" ] + cratedb-version: [ "nightly" ] + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: "pip" + cache-dependency-path: | + pyproject.toml + requirements.txt + requirements-test.txt + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "latest" + + - name: Install utilities + run: | + uv pip install --system -r requirements.txt + + - name: Validate application/metabase + run: | + ngr test --accept-no-venv application/metabase diff --git a/application/metabase/README.md b/application/metabase/README.md new file mode 100644 index 00000000..755e8053 --- /dev/null +++ b/application/metabase/README.md @@ -0,0 +1,38 @@ +# Verify Metabase with CrateDB + +## About + +This folder includes software integration tests for verifying +that Metabase works well together with CrateDB. +The test harness is based on Docker Compose. + +## What's Inside + +A basic test case that reads CrateDB's `sys.summit` table through +Metabase, after connecting CrateDB as a PostgreSQL database. + +## Setup + +Setup sandbox and install packages. +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r requirements.txt -r requirements-test.txt +``` + +## Usage + +Run integration tests. +```bash +pytest +``` + +Watch service logs. +```shell +docker compose logs -f +``` + +Note that the setup is configured to keep the containers alive after starting +them. If you want to actively recycle them, invoke `docker compose down` before +running `pytest`. diff --git a/application/metabase/backlog.md b/application/metabase/backlog.md new file mode 100644 index 00000000..a1a50116 --- /dev/null +++ b/application/metabase/backlog.md @@ -0,0 +1,81 @@ +# CrateDB <-> Metabase backlog + + +## metabase/metabase:v0.48.4 +Starts tripping with a hard error. +``` +metabase | 2024-11-23 16:34:33,843 WARN sync.util :: Error in sync step Sync postgres Database 2 'cratedb-testdrive' +metabase | org.postgresql.util.PSQLException: ERROR: line 5:20: no viable alternative at input 'select\n NULL as role,\n t.schemaname as schema,\n t.objectname as table' +``` +```sql +with table_privileges as ( + select + NULL as role, + t.schemaname as schema, + t.objectname as table, + pg_catalog.has_table_privilege(current_user, '"' || t.schemaname || '"' || '.' || '"' || t.objectname || '"', 'UPDATE') as update, + pg_catalog.has_table_privilege(current_user, '"' || t.schemaname || '"' || '.' || '"' || t.objectname || '"', 'SELECT') as select, + pg_catalog.has_table_privilege(current_user, '"' || t.schemaname || '"' || '.' || '"' || t.objectname || '"', 'INSERT') as insert, + pg_catalog.has_table_privilege(current_user, '"' || t.schemaname || '"' || '.' || '"' || t.objectname || '"', 'DELETE') as delete + from ( + select schemaname, tablename as objectname from pg_catalog.pg_tables + union + select schemaname, viewname as objectname from pg_catalog.pg_views + union + select schemaname, matviewname as objectname from pg_catalog.pg_matviews + ) t + where t.schemaname !~ '^pg_' + and t.schemaname <> 'information_schema' + and pg_catalog.has_schema_privilege(current_user, t.schemaname, 'USAGE') +) +select t.* +from table_privileges t; +``` +``` +SQLParseException[line 5:17: no viable alternative at input 'select\nNULL as role,\nt.schemaname as schema,\nt.objectname as table'] +``` + + +## metabase/metabase:v0.45.4.3 + +``` +2024-11-22 23:22:07,139 ERROR driver.util :: Failed to connect to Database +org.postgresql.util.PSQLException: The server does not support SSL. +``` + +``` +2024-11-22 23:22:07,290 WARN metabase.email :: Failed to send email +clojure.lang.ExceptionInfo: SMTP host is not set. {:cause :smtp-host-not-set} +``` + +``` +2024-11-22 23:22:08,189 WARN sync.util :: Error running step 'sync-timezone' for postgres Database 2 'cratedb-testdrive' +java.lang.Exception: Unable to parse date string '2024-11-22 23:22:08.175 ' for database engine 'postgres' +``` + +``` +2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type 'regclass' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*. +2024-11-22 23:22:08,726 WARN sync.describe-table :: Don't know how to map column type '_int2' to a Field base_type, falling back to :type/*. +... +``` + +``` +2024-11-22 23:22:13,900 WARN sync.util :: Error fingerprinting Table 12 'sys.jobs' +clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:359: no viable alternative at input 'SELECT "source"."substring531" AS "substring531", "source"."substring532" AS "substring532", "source"."substring533" AS "substring533", "source"."started" AS "started", "source"."substring534" AS "substring534", "source"."substring535" AS "substring535", "source"."substring536" AS "substring536" FROM (SELECT "sys"."jobs"."id" AS "id", ("sys"."jobs"."node"#>' +``` + +``` +2024-11-22 23:22:14,390 WARN sync.util :: Error fingerprinting Table 13 'sys.nodes' +clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:97: no viable alternative at input 'SELECT "source"."load['probe_timestamp']" AS "load['probe_timestamp']", ("source"."fs['total']"#>' +``` + +``` +2024-11-22 23:22:23,588 ERROR models.field-values :: Error fetching field values +clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conffeqop': invalid data type 'integer_array'. + +2024-11-22 23:22:23,599 ERROR models.field-values :: Error fetching field values +clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conkey': invalid data type 'smallint_array'. +``` diff --git a/application/metabase/docker-compose.yml b/application/metabase/docker-compose.yml new file mode 100644 index 00000000..542a6d63 --- /dev/null +++ b/application/metabase/docker-compose.yml @@ -0,0 +1,51 @@ +networks: + metanet-demo: + driver: bridge + +services: + + # Metabase + # https://www.metabase.com/docs/latest/installation-and-operation/running-metabase-on-docker#example-docker-compose-yaml-file + metabase: + image: metabase/metabase:v0.48.3 + container_name: metabase + hostname: metabase + volumes: + - /dev/urandom:/dev/random:ro + ports: + - 3000:3000 + networks: + - metanet-demo + healthcheck: + test: curl --fail -I http://localhost:3000/api/health || exit 1 + interval: 15s + timeout: 5s + retries: 5 + + # CrateDB + # https://github.com/crate/crate + cratedb: + image: crate/crate:nightly + container_name: cratedb + hostname: cratedb + ports: + - 4200:4200 + - 5432:5432 + networks: + - metanet-demo + healthcheck: + # https://github.com/crate/docker-crate/pull/151/files + test: curl --max-time 25 http://localhost:4200 || exit 1 + interval: 30s + timeout: 30s + + # Wait for all defined services to be fully available by probing their health + # status, even when using `docker compose up --detach`. + # https://marcopeg.com/2019/docker-compose-healthcheck/ + wait: + image: dadarek/wait-for-dependencies + depends_on: + metabase: + condition: service_healthy + cratedb: + condition: service_healthy diff --git a/application/metabase/metabase_rig.py b/application/metabase/metabase_rig.py new file mode 100644 index 00000000..3b849c63 --- /dev/null +++ b/application/metabase/metabase_rig.py @@ -0,0 +1,163 @@ +import time +from functools import lru_cache + +import requests +from metabase_api import Metabase_API + + +class MetabaseRig: + """ + Support end-to-end testing of CrateDB and Metabase. + + https://www.metabase.com/docs/latest/api-documentation + + Authenticate your requests with a session token + https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#authenticate-your-requests-with-a-session-token + """ + def __init__(self, url: str): + self.username = "foobar@example.org" + self.password = "123456metabase" + self.mb = None + + self.url = url + self.api_url = f"{url.rstrip('/')}/api" + self.session = requests.Session() + self.session_token = None + + def get_setup_token(self) -> str: + response = self.session.get(f"{self.api_url}/session/properties") + return response.json()["setup-token"] + + def setup(self): + """ + Run Metabase setup, create admin user, and return a session ID. + + https://www.metabase.com/docs/latest/api/setup#post-apisetup + https://discourse.metabase.com/t/rest-api-for-initial-setup-process/3419 + """ + response = self.session.post(f"{self.api_url}/setup", json={ + "prefs": { + "allow_tracking": "false", + "site_locale": "en", + "site_name": "Hotzenplotz", + }, + "user": { + "password": self.password, + "password_confirm": self.password, + "email": self.username, + }, + "token": self.get_setup_token(), + }) + self.session_token = response.json()["id"] + + def login(self): + self.session.post(f"{self.api_url}/session", json={ + "username": self.username, + "password": self.password, + }) + self.mb = Metabase_API(self.url, self.username, self.password) + + def get_databases(self): + return self.session.get(f"{self.api_url}/database").json() + + def database(self, name: str) -> "MetabaseDatabase": + return MetabaseDatabase(rig=self, name=name) + + +class MetabaseDatabase: + def __init__(self, rig: MetabaseRig, name: str): + self.rig = rig + self.name = name + self.timeout = 15 + + @property + @lru_cache(maxsize=None) + def id(self): + return self.rig.mb.get_item_id("database", self.name) + + def create(self): + """ + https://www.metabase.com/docs/latest/api/database#post-apidatabase + """ + self.rig.session.post( + f"{self.rig.api_url}/database", + json={ + "engine": "postgres", + "name": self.name, + "details": { + "host": "cratedb", + "port": 5432, + "user": "crate", + }, + }, + ) + + def exists(self): + try: + response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}") + return response.status_code == 200 + except ValueError as ex: + if "There is no DB with the name" not in str(ex): + raise + return False + + def schema(self, name: str): + response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}/schema/{name}") + response.raise_for_status() + return response.json() + + def table_names(self, schema_name: str): + names = [] + for item in self.schema(name=schema_name): + names.append(f"{item['schema']}.{item['name']}") + return names + + def table_id_by_name(self, name: str): + return self.rig.mb.get_item_id("table", name) + + def query(self, table: str): + response = self.rig.session.post( + f"{self.rig.api_url}/dataset", + json={ + "database": self.id, + "query": { + "source-table": self.table_id_by_name(table), + }, + "type": "query", + "parameters": [], + } + ) + return response.json() + + def wait_database(self): + def condition(): + return self.exists() + return self._wait(condition, f"Database not found: {self.name}") + + def wait_schema(self, name: str): + def condition(): + try: + if schema := self.schema(name): + return schema + except requests.RequestException: + pass + return False + return self._wait(condition, f"Database schema '{name}' not found in database '{self.name}'") + + def wait_table(self, schema: str, name: str): + def condition(): + if schema_info := self.wait_schema(schema): + for item in schema_info: + if item["name"] == name and item["initial_sync_status"] == "complete": + return True + return self._wait(condition, f"Table not found: {schema}.{name}") + + def _wait(self, condition, timeout_message): + timeout = self.timeout + while True: + if result := condition(): + return result + if timeout == 0: + raise TimeoutError(timeout_message) + timeout -= 1 + time.sleep(1) diff --git a/application/metabase/pyproject.toml b/application/metabase/pyproject.toml new file mode 100644 index 00000000..d8811ca2 --- /dev/null +++ b/application/metabase/pyproject.toml @@ -0,0 +1,14 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + --capture=no + --docker-compose-no-build + --use-running-containers + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] diff --git a/application/metabase/requirements-test.txt b/application/metabase/requirements-test.txt new file mode 100644 index 00000000..1777869d --- /dev/null +++ b/application/metabase/requirements-test.txt @@ -0,0 +1,2 @@ +pytest<9 +pytest-docker-compose-v2<0.2 diff --git a/application/metabase/requirements.txt b/application/metabase/requirements.txt new file mode 100644 index 00000000..5579ba96 --- /dev/null +++ b/application/metabase/requirements.txt @@ -0,0 +1 @@ +metabase-api<3.5 diff --git a/application/metabase/test.py b/application/metabase/test.py new file mode 100644 index 00000000..d23f389c --- /dev/null +++ b/application/metabase/test.py @@ -0,0 +1,35 @@ +from metabase_rig import MetabaseRig + +pytest_plugins = ["docker_compose"] + + +def test_api_sys_summits(session_scoped_container_getter): + """ + End-to-end test reading data from CrateDB through Metabase. + """ + rig = MetabaseRig("http://localhost:3000/") + + # Login to and optionally provision Metabase. + try: + rig.login() + except: + rig.setup() + rig.login() + + # Acquire a database handle, optionally creating a database. + db = rig.database("cratedb-testdrive") + if not db.exists(): + db.create() + + # Wait for the `sys` schema to become available. + db.wait_schema("sys") + + # Validate a table exists in the `sys` schema. + assert "sys.summits" in db.table_names(schema_name="sys") + + # Wait for the `sys.summits` table to become available. + db.wait_table("sys", "summits") + + # Query the `sys.summits` table. + data = db.query("summits") + assert data["data"]["rows"][0][5] == "Mont Blanc"