Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
feat(sqlite): Sqlite integration(#194)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrwqa authored Jun 9, 2023
1 parent f292ac7 commit 07bfd68
Show file tree
Hide file tree
Showing 20 changed files with 310 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ To learn more about collector types and ODD Platform's architecture, [read the d
| Airbyte | [config](config_examples/airbyte.yaml) |
| SingleStore | [config](config_examples/singlestore.yaml) |
| cockroachdb | [config](config_examples/cockroachdb.yaml) |
| sqlite | [config](config_examples/sqlite.yaml) |

## Class diagram of adapter class hierarchy
This may help you to understand which fields you need for each adapter in `collector_config.yaml` and also may be helpful for a new adapter developer.
Expand Down
7 changes: 7 additions & 0 deletions config_examples/sqlite.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
platform_host_url: http://localhost:8080
default_pulling_interval: 10
token: ""
plugins:
- type: sqlite
name: sqlite_adapter
data_source: /path/to/local/file.db # in-memory database is not available as it won't be visible because each session has its own separate in-memory database.
Empty file.
37 changes: 37 additions & 0 deletions odd_collector/adapters/sqlite/adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from funcy import concat, lpluck_attr
from odd_collector_sdk.domain.adapter import BaseAdapter
from odd_models.models import DataEntityList
from oddrn_generator import SQLiteGenerator

from odd_collector.domain.plugin import SQLitePlugin
from .mappers.database import map_database
from .mappers.table import map_table
from .mappers.view import map_view
from .repository.sqlalchemy_repository import SqlAlchemyRepository


class Adapter(BaseAdapter):
config: SQLitePlugin
generator: SQLiteGenerator

def __init__(self, config: SQLitePlugin) -> None:
super().__init__(config)

def create_generator(self) -> SQLiteGenerator:
return SQLiteGenerator(path=self.config.data_source)

def get_data_entity_list(self) -> DataEntityList:
repo = SqlAlchemyRepository(self.config)
tables_entities = [
map_table(self.generator, table) for table in repo.get_tables()
]

views_entities = [map_view(self.generator, view) for view in repo.get_views()]

oddrns = lpluck_attr("oddrn", concat(tables_entities, views_entities))
database_entity = map_database(self.generator, self.config.data_source, oddrns)

return DataEntityList(
data_source_oddrn=self.get_data_source_oddrn(),
items=tables_entities + views_entities + [database_entity],
)
3 changes: 3 additions & 0 deletions odd_collector/adapters/sqlite/domain/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .column import Column
from .table import Table
from .view import View
20 changes: 20 additions & 0 deletions odd_collector/adapters/sqlite/domain/column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from dataclasses import dataclass
from typing import Any, Optional

import sqlalchemy.sql.sqltypes as sqltype
from funcy import omit


@dataclass
class Column:
name: str
type: sqltype
primary_key: Optional[bool]
nullable: Optional[bool]
default: Optional[Any]
autoincrement: Optional[Any]
logical_type: Optional[str]

@property
def odd_metadata(self) -> dict:
return omit(self.__dict__, {"name", "type", "logical_type"})
16 changes: 16 additions & 0 deletions odd_collector/adapters/sqlite/domain/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass
from typing import List

from funcy import omit

from .column import Column


@dataclass
class Table:
name: str
columns: List[Column]

@property
def odd_metadata(self) -> dict:
return omit(self.__dict__, {"columns"})
17 changes: 17 additions & 0 deletions odd_collector/adapters/sqlite/domain/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from dataclasses import dataclass
from typing import List

from funcy import omit

from .column import Column


@dataclass
class View:
name: str
columns: List[Column]
view_definition: str

@property
def odd_metadata(self) -> dict:
return omit(self.__dict__, {"view_definition", "columns"})
Empty file.
29 changes: 29 additions & 0 deletions odd_collector/adapters/sqlite/mappers/column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from odd_collector_sdk.utils.metadata import extract_metadata, DefinitionType
from odd_models.models import DataSetField, DataSetFieldType
from oddrn_generator import SQLiteGenerator

from .column_type import map_type
from ..domain import Column


def map_column(
generator: SQLiteGenerator, column_path: str, column: Column
) -> DataSetField:
"""
Maps column to DataSetField
:param generator - Oddrn generator
:param column_path - parent type 'tables_column' | 'views_column'
:param column - Column model
"""
generator.set_oddrn_paths(**{column_path: column.name})
return DataSetField(
name=column.name,
oddrn=generator.get_oddrn_by_path(column_path),
type=DataSetFieldType(
type=map_type(column.type),
is_nullable=column.nullable,
logical_type=str(column.logical_type),
),
is_primary_key=column.primary_key,
metadata=[extract_metadata("sqlite", column, DefinitionType.DATASET_FIELD)],
)
23 changes: 23 additions & 0 deletions odd_collector/adapters/sqlite/mappers/column_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from odd_models.models import Type
import sqlalchemy.sql.sqltypes as sqltype


def map_type(column_type: sqltype) -> Type:
if isinstance(column_type, sqltype.Numeric):
return Type.TYPE_NUMBER
elif isinstance(column_type, sqltype.Integer):
return Type.TYPE_INTEGER
elif isinstance(column_type, (sqltype.DateTime, sqltype.Date)):
return Type.TYPE_DATETIME
elif isinstance(column_type, (sqltype.Time,)):
return Type.TYPE_TIME
elif isinstance(column_type, sqltype.Text):
return Type.TYPE_CHAR
elif isinstance(column_type, sqltype.String):
return Type.TYPE_STRING
elif isinstance(column_type, sqltype._Binary):
return Type.TYPE_BINARY
elif isinstance(column_type, sqltype.Boolean):
return Type.TYPE_BOOLEAN
else:
return Type.TYPE_UNKNOWN
21 changes: 21 additions & 0 deletions odd_collector/adapters/sqlite/mappers/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import List

from odd_models.models import DataEntity, DataEntityGroup, DataEntityType
from oddrn_generator import SQLiteGenerator
from pydantic.types import FilePath


def map_database(
generator: SQLiteGenerator, data_source: FilePath, entities: List[str]
) -> DataEntity:
"""
:param entities - list of Table | View oddrn
:param generator - SQLiteGenerator
:param data_source - name of data source
"""
return DataEntity(
oddrn=generator.get_oddrn_by_path("path"),
name=str(data_source),
type=DataEntityType.DATABASE_SERVICE,
data_entity_group=DataEntityGroup(entities_list=entities),
)
23 changes: 23 additions & 0 deletions odd_collector/adapters/sqlite/mappers/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from odd_collector_sdk.utils.metadata import extract_metadata, DefinitionType
from odd_models.models import DataEntity, DataEntityType, DataSet
from oddrn_generator import SQLiteGenerator

from .column import map_column
from ..domain import Table


def map_table(generator: SQLiteGenerator, table: Table) -> DataEntity:
generator.set_oddrn_paths(tables=table.name)

return DataEntity(
oddrn=generator.get_oddrn_by_path("tables"),
name=table.name,
type=DataEntityType.TABLE,
dataset=DataSet(
field_list=[
map_column(generator, "tables_columns", column)
for column in table.columns
]
),
metadata=[extract_metadata("sqlite", table, DefinitionType.DATASET)],
)
27 changes: 27 additions & 0 deletions odd_collector/adapters/sqlite/mappers/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from odd_collector_sdk.utils.metadata import extract_metadata, DefinitionType
from odd_models.models import DataEntity, DataEntityType, DataSet
from oddrn_generator import SQLiteGenerator

from odd_collector.domain.utils import extract_transformer_data
from .column import map_column
from ..domain import View


def map_view(generator: SQLiteGenerator, view: View) -> DataEntity:
generator.set_oddrn_paths(views=view.name)

return DataEntity(
oddrn=generator.get_oddrn_by_path("views"),
name=view.name,
type=DataEntityType.VIEW,
dataset=DataSet(
field_list=[
map_column(generator, "views_columns", column)
for column in view.columns
]
),
data_transformer=extract_transformer_data(
view.view_definition, generator, "tables"
),
metadata=[extract_metadata("sqlite", view, DefinitionType.DATASET)],
)
Empty file.
15 changes: 15 additions & 0 deletions odd_collector/adapters/sqlite/repository/base_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import Any, Protocol


class Repository(Protocol):
def get_schemas(self) -> Any:
...

def get_databases(self) -> Any:
...

def get_tables(self) -> Any:
...

def get_columns(self) -> Any:
...
57 changes: 57 additions & 0 deletions odd_collector/adapters/sqlite/repository/sqlalchemy_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import Any, Dict, Iterable, List

import sqlalchemy as db
from funcy import lmap
from sqlalchemy.util import FacadeDict

from odd_collector.domain.plugin import SQLitePlugin
from .base_repository import Repository
from ..domain import Column, Table, View


def create_column(data: Dict[str, Any]) -> Column:
return Column(
name=data.get("name"),
type=data["type"],
primary_key=bool(data.get("primary_key")),
nullable=data.get("nullable"),
default=data.get("default"),
logical_type=data.get("type"),
autoincrement=data.get("autoincrement"),
)


class SqlAlchemyRepository(Repository):
def __init__(self, config: SQLitePlugin) -> None:
self._config = config
self._eng = self._create_engine()
self._meta = db.MetaData(bind=self._eng)
self._meta.reflect(views=True)
self._tables: db.util.FacadeDict[str, db.Table] = self._meta.tables
self._inspector = db.inspect(self._eng)

def get_tables(self) -> Iterable[Table]:
names = self._inspector.get_table_names()
for table_name in names:
table = Table(
name=table_name,
columns=self._get_columns(table_name),
)
yield table

def get_views(self) -> Iterable[View]:
names = self._inspector.get_view_names()
for view_name in names:
view = View(
name=view_name,
columns=self._get_columns(view_name),
view_definition=self._inspector.get_view_definition(view_name),
)
yield view

def _get_columns(self, table_name: str) -> List[Column]:
return lmap(create_column, self._inspector.get_columns(table_name))

def _create_engine(self) -> db.engine.Engine:
connection_str = f"sqlite:///{self._config.data_source}"
return db.create_engine(connection_str)
8 changes: 7 additions & 1 deletion odd_collector/domain/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from odd_collector_sdk.domain.plugin import Plugin as BasePlugin
from odd_collector_sdk.types import PluginFactory
from pydantic import BaseModel, SecretStr, validator
from pydantic import BaseModel, SecretStr, validator, FilePath

from odd_collector.domain.predefined_data_source import PredefinedDatasourceParams

Expand Down Expand Up @@ -296,6 +296,11 @@ class CouchbasePlugin(BasePlugin):
num_sample_values: Optional[int] = 10


class SQLitePlugin(BasePlugin):
data_source: FilePath
type: Literal["sqlite"]


class DatabricksPlugin(BasePlugin):
type: Literal["databricks"]
workspace: str
Expand Down Expand Up @@ -339,5 +344,6 @@ class DatabricksPlugin(BasePlugin):
"fivetran": FivetranPlugin,
"cockroachdb": CockroachDBPlugin,
"couchbase": CouchbasePlugin,
"sqlite": SQLitePlugin,
"databricks": DatabricksPlugin,
}
12 changes: 6 additions & 6 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ more-itertools = "8.13.0"
mysql-connector-python = "^8.0.32"
neo4j = "4.3.7"
oracledb = "1.2.1"
oddrn-generator = "^0.1.76"
oddrn-generator = "^0.1.82"
psycopg2-binary = "2.9.3"
python = "^3.9"
pyhumps = "3.0.2"
Expand Down

0 comments on commit 07bfd68

Please sign in to comment.