Skip to content

Commit

Permalink
feat: add YDB as a new database engine (apache#31141)
Browse files Browse the repository at this point in the history
  • Loading branch information
vgvoleg authored Dec 5, 2024
1 parent 638f82b commit cf5c770
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 0 deletions.
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ google-sheets.svg
ibm-db2.svg
postgresql.svg
snowflake.svg
ydb.svg

# docs-related
erd.puml
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Here are some of the major database solutions that are supported:
<img src="https://superset.apache.org/img/databases/oceanbase.svg" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/sap-hana.png" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/denodo.png" alt="denodo" border="0" width="200" />
<img src="https://superset.apache.org/img/databases/ydb.svg" alt="ydb" border="0" width="200" />
</p>

**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/configuration/databases).
Expand Down
73 changes: 73 additions & 0 deletions docs/docs/configuration/databases.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ are compatible with Superset.
| [TimescaleDB](/docs/configuration/databases#timescaledb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>:<Port>/<Database Name>` |
| [Trino](/docs/configuration/databases#trino) | `pip install trino` | `trino://{username}:{password}@{hostname}:{port}/{catalog}` |
| [Vertica](/docs/configuration/databases#vertica) | `pip install sqlalchemy-vertica-python` | `vertica+vertica_python://<UserName>:<DBPassword>@<Database Host>/<Database Name>` |
| [YDB](/docs/configuration/databases#ydb) | `pip install ydb-sqlalchemy` | `ydb://{host}:{port}/{database_name}` |
| [YugabyteDB](/docs/configuration/databases#yugabytedb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>/<Database Name>` |
---

Expand Down Expand Up @@ -1537,6 +1538,78 @@ Other parameters:
- Load Balancer - Backup Host



#### YDB

The recommended connector library for [YDB](https://ydb.tech/) is
[ydb-sqlalchemy](https://pypi.org/project/ydb-sqlalchemy/).

##### Connection String

The connection string for YDB looks like this:

```
ydb://{host}:{port}/{database_name}
```

##### Protocol
You can specify `protocol` in the `Secure Extra` field at `Advanced / Security`:

```
{
"protocol": "grpcs"
}
```

Default is `grpc`.


##### Authentication Methods
###### Static Credentials
To use `Static Credentials` you should provide `username`/`password` in the `Secure Extra` field at `Advanced / Security`:

```
{
"credentials": {
"username": "...",
"password": "..."
}
}
```


###### Access Token Credentials
To use `Access Token Credentials` you should provide `token` in the `Secure Extra` field at `Advanced / Security`:

```
{
"credentials": {
"token": "...",
}
}
```


##### Service Account Credentials
To use Service Account Credentials, you should provide `service_account_json` in the `Secure Extra` field at `Advanced / Security`:

```
{
"credentials": {
"service_account_json": {
"id": "...",
"service_account_id": "...",
"created_at": "...",
"key_algorithm": "...",
"public_key": "...",
"private_key": "..."
}
}
}
```



#### YugabyteDB

[YugabyteDB](https://www.yugabyte.com/) is a distributed SQL database built on top of PostgreSQL.
Expand Down
20 changes: 20 additions & 0 deletions docs/static/img/databases/ydb.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ netezza = ["nzalchemy>=11.0.2"]
starrocks = ["starrocks>=1.0.0"]
doris = ["pydoris>=1.0.0, <2.0.0"]
oceanbase = ["oceanbase_py>=0.0.1"]
ydb = ["ydb-sqlalchemy>=0.1.2"]
development = [
"docker",
"flask-testing",
Expand Down
108 changes: 108 additions & 0 deletions superset/db_engine_specs/ydb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

import logging
from datetime import datetime
from typing import Any, TYPE_CHECKING

from sqlalchemy import types

from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec
from superset.utils import json

if TYPE_CHECKING:
from superset.models.core import Database


logger = logging.getLogger(__name__)


class YDBEngineSpec(BaseEngineSpec):
engine = "yql"
engine_aliases = {"ydb", "yql+ydb"}
engine_name = "YDB"

default_driver = "ydb"

sqlalchemy_uri_placeholder = "ydb://{host}:{port}/{database_name}"

# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = {"$.connect_args.credentials", "$.credentials"}

disable_ssh_tunneling = False

supports_file_upload = False

allows_alias_in_orderby = True

_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1S')))",
TimeGrain.THIRTY_SECONDS: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT30S')))",
TimeGrain.MINUTE: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1M')))",
TimeGrain.FIVE_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT5M')))",
TimeGrain.TEN_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT10M')))",
TimeGrain.FIFTEEN_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT15M')))",
TimeGrain.THIRTY_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT30M')))",
TimeGrain.HOUR: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1H')))",
TimeGrain.DAY: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('P1D')))",
TimeGrain.WEEK: "DateTime::MakeDatetime(DateTime::StartOfWeek({col}))",
TimeGrain.MONTH: "DateTime::MakeDatetime(DateTime::StartOfMonth({col}))",
TimeGrain.QUARTER: "DateTime::MakeDatetime(DateTime::StartOfQuarter({col}))",
TimeGrain.YEAR: "DateTime::MakeDatetime(DateTime::StartOfYear({col}))",
}

@classmethod
def epoch_to_dttm(cls) -> str:
return "DateTime::MakeDatetime({col})"

@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
) -> str | None:
sqla_type = cls.get_sqla_column_type(target_type)

if isinstance(sqla_type, types.Date):
return f"DateTime::MakeDate(DateTime::ParseIso8601('{dttm.date().isoformat()}'))"
if isinstance(sqla_type, types.DateTime):
return f"""DateTime::MakeDatetime(DateTime::ParseIso8601('{dttm.isoformat(sep="T", timespec="seconds")}'))"""
return None

@staticmethod
def update_params_from_encrypted_extra(
database: Database,
params: dict[str, Any],
) -> None:
if not database.encrypted_extra:
return

try:
encrypted_extra = json.loads(database.encrypted_extra)
connect_args = params.setdefault("connect_args", {})

if "protocol" in encrypted_extra:
connect_args["protocol"] = encrypted_extra["protocol"]

if "credentials" in encrypted_extra:
credentials_info = encrypted_extra["credentials"]
connect_args["credentials"] = credentials_info

except json.JSONDecodeError as ex:
logger.error(ex, exc_info=True)
raise
83 changes: 83 additions & 0 deletions tests/unit_tests/db_engine_specs/test_ydb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
from __future__ import annotations

from datetime import datetime
from typing import Any, Optional
from unittest.mock import Mock

import pytest

from superset.utils import json
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
from tests.unit_tests.fixtures.common import dttm # noqa: F401


def test_epoch_to_dttm() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec

assert YDBEngineSpec.epoch_to_dttm() == "DateTime::MakeDatetime({col})"


@pytest.mark.parametrize(
"target_type,expected_result",
[
("Date", "DateTime::MakeDate(DateTime::ParseIso8601('2019-01-02'))"),
(
"DateTime",
"DateTime::MakeDatetime(DateTime::ParseIso8601('2019-01-02T03:04:05'))",
),
("UnknownType", None),
],
)
def test_convert_dttm(
target_type: str,
expected_result: Optional[str],
dttm: datetime, # noqa: F811
) -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec as spec

assert_convert_dttm(spec, target_type, expected_result, dttm)


def test_specify_protocol() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec

database = Mock()

extra = {"protocol": "grpcs"}
database.encrypted_extra = json.dumps(extra)

params: dict[str, Any] = {}
YDBEngineSpec.update_params_from_encrypted_extra(database, params)
connect_args = params.setdefault("connect_args", {})
assert connect_args.get("protocol") == "grpcs"


def test_specify_credentials() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec

database = Mock()

auth_params = {"username": "username", "password": "password"}
database.encrypted_extra = json.dumps({"credentials": auth_params})

params: dict[str, Any] = {}
YDBEngineSpec.update_params_from_encrypted_extra(database, params)
connect_args = params.setdefault("connect_args", {})
assert connect_args.get("credentials") == auth_params

0 comments on commit cf5c770

Please sign in to comment.