Skip to content

Commit

Permalink
Make it work
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Dec 10, 2023
1 parent a3fa151 commit 736f72c
Show file tree
Hide file tree
Showing 13 changed files with 296 additions and 53 deletions.
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2

updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"

- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
82 changes: 82 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
---
name: "Tests: Common"

on:
pull_request: ~
push:
branches: [ main ]

# Allow job to be triggered manually.
workflow_dispatch:

# Run job each night after CrateDB nightly has been published.
schedule:
- cron: '0 3 * * *'

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}

jobs:

tests:

runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8", "3.11"]

env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
# Do not tear down Testcontainers
TC_KEEPALIVE: true

# https://docs.github.com/en/actions/using-containerized-services/about-service-containers
services:
cratedb:
image: crate/crate:nightly
ports:
- 4200:4200
- 5432:5432

name: Python ${{ matrix.python-version }} on OS ${{ matrix.os }}
steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: 'pyproject.toml'

- name: Set up project
run: |
# `setuptools 0.64.0` adds support for editable install hooks (PEP 660).
# https://github.com/pypa/setuptools/blob/main/CHANGES.rst#v6400
pip install "setuptools>=64" --upgrade
# Install package in editable mode.
pip install --use-pep517 --prefer-binary --editable=.[test,develop]
- name: Run linter and software tests
run: |
# poe check
poe test
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml
flags: main
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
29 changes: 29 additions & 0 deletions docs/backlog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Backlog

## PK UX

It looks like a simple SA model like this is sufficient to provide
auto-incrementing primary keys, when used with PostgreSQL?

It must happen implicitly somehow, because the `id` column isn't configured
explicitly to be a `SERIAL`, or otherwise to be "auto-increment".
```python
Table(
table_name,
metadata_obj,
Column("id", Integer, primary_key=True),
Column("updated_at", DateTime(), nullable=False),
Column("name", String()),
)
```

```python
import sqlalchemy as sa

sa.Column(
"id", sa.BigInteger, primary_key=True,
server_default=sa.text("NOW()::LONG"),
)
```

-- https://community.cratedb.com/t/sqlalchemy-auto-incrementing-integer-based-server-side-primary-key-for-emulating-postgresqls-serial-type/1664
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ dynamic = [
"version",
]
dependencies = [
"crate[sqlalchemy]",
"cratedb-toolkit",
"crate[sqlalchemy]>=0.34",
"cratedb-toolkit @ git+https://github.com/crate-workbench/cratedb-toolkit@sa-no-pinning",
'importlib-resources; python_version < "3.9"',
"meltanolabs-tap-postgres==0.0.6",
]
Expand All @@ -103,9 +103,12 @@ release = [
"twine<5",
]
test = [
"faker>=18.5.1,<21.0.0",
"pendulum~=2.1",
"pytest<8",
"pytest-cov<5",
"pytest-mock<4",
"singer-sdk[testing]",
]
[project.urls]
changelog = "https://github.com/crate-workbench/meltano-tap-cratedb/blob/main/CHANGES.md"
Expand Down
5 changes: 4 additions & 1 deletion tap_cratedb/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
"""A Singer tap for CrateDB, built with the Meltano SDK."""
"""A Singer tap for CrateDB, built with the Meltano SDK, based on the PostgreSQL tap."""
from tap_cratedb.patch import patch_sqlalchemy_dialect

patch_sqlalchemy_dialect()
65 changes: 65 additions & 0 deletions tap_cratedb/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import datetime as dt


def patch_sqlalchemy_dialect():
patch_types()
patch_datetime()
patch_get_pk_constraint()


def patch_datetime():
"""
The test suite will supply `dt.date` objects, which will
otherwise fail on this routine.
"""

from crate.client.sqlalchemy.dialect import DateTime

def bind_processor(self, dialect):
def process(value):
if isinstance(value, (dt.datetime, dt.date)):
return value.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
else:
return value
return process

DateTime.bind_processor = bind_processor


def patch_get_pk_constraint():
"""
Convert from `set` to `list`, to work around weirdness of the Python dialect.
tap = TapCrateDB(config=SAMPLE_CONFIG)
tap_catalog = json.loads(tap.catalog_json_text)
TypeError: Object of type set is not JSON serializable
"""
from sqlalchemy.engine import reflection
from crate.client.sqlalchemy import CrateDialect

get_pk_constraint_dist = CrateDialect.get_pk_constraint

@reflection.cache
def get_pk_constraint(self, engine, table_name, schema=None, **kw):
outcome = get_pk_constraint_dist(self, engine, table_name, schema=schema, **kw)
outcome["constrained_columns"] = list(outcome["constrained_columns"])
return outcome

CrateDialect.get_pk_constraint = get_pk_constraint


def patch_types():
"""
Emulate PostgreSQL's `JSON` and `JSONB` types using CrateDB's `OBJECT` type.
"""
from crate.client.sqlalchemy.compiler import CrateTypeCompiler

def visit_JSON(self, type_, **kw):
return "OBJECT"

def visit_JSONB(self, type_, **kw):
return "OBJECT"

CrateTypeCompiler.visit_JSON = visit_JSON
CrateTypeCompiler.visit_JSONB = visit_JSONB
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""Test Configuration."""
import logging

pytest_plugins = ("singer_sdk.testing.pytest_plugin",)

# Increase loggin for components we are working on.
logging.getLogger("sqlconnector").setLevel(logging.DEBUG)
logging.getLogger("tap-cratedb").setLevel(logging.DEBUG)
logging.getLogger("tap-postgres").setLevel(logging.DEBUG)

# Decrease logging for components not of immediate interest.
logging.getLogger("faker").setLevel(logging.INFO)
logging.getLogger("crate.client.http").setLevel(logging.INFO)
logging.getLogger("urllib3.connectionpool").setLevel(logging.INFO)
2 changes: 1 addition & 1 deletion tests/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"id"
],
"forced-replication-method": "",
"schema-name": "public",
"schema-name": "doc",
"selected": true,
"replication-method": "INCREMENTAL",
"replication-key": "updated_at"
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/data_selected_columns_only.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"id"
],
"forced-replication-method": "",
"schema-name": "public",
"schema-name": "doc",
"selected": true,
"replication-method": "INCREMENTAL",
"replication-key": "updated_at"
Expand Down
Loading

0 comments on commit 736f72c

Please sign in to comment.