Skip to content

Commit

Permalink
test(array): move array type inference tests to backend-specific loca…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
cpcloud committed Dec 7, 2023
1 parent 7d5851d commit d8d622c
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 140 deletions.
16 changes: 16 additions & 0 deletions ibis/backends/clickhouse/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
import ibis.tests.strategies as its
from ibis.backends.clickhouse.datatypes import ClickhouseType
Expand Down Expand Up @@ -35,6 +36,21 @@ def test_columns_types_with_additional_argument(con):
assert df.datetime_ns_col.dtype.name == "datetime64[ns, UTC]"


def test_array_discovery_clickhouse(con):
t = con.tables.array_types
expected = ibis.schema(
dict(
x=dt.Array(dt.int64, nullable=False),
y=dt.Array(dt.string, nullable=False),
z=dt.Array(dt.float64, nullable=False),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(dt.Array(dt.int64, nullable=False), nullable=False),
)
)
assert t.schema() == expected


@pytest.mark.parametrize(
("ch_type", "ibis_type"),
[
Expand Down
15 changes: 15 additions & 0 deletions ibis/backends/postgres/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,3 +1349,18 @@ def test_string_to_binary_round_trip(con):
cur = c.exec_driver_sql(sql_string)
expected = pd.Series([row[0][0] for row in cur], name=name)
tm.assert_series_equal(result, expected)


def test_array_discovery(con):
t = con.tables.array_types
expected = ibis.schema(
dict(
x=dt.Array(dt.int64),
y=dt.Array(dt.string),
z=dt.Array(dt.float64),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(dt.int64),
)
)
assert t.schema() == expected
15 changes: 15 additions & 0 deletions ibis/backends/snowflake/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,18 @@ def test_extract_timestamp_from_table_sqlalchemy(con, snowflake_type, ibis_type)

t = con.table(name)
assert t.schema() == expected_schema


def test_array_discovery(con):
t = con.tables.ARRAY_TYPES
expected = ibis.schema(
dict(
x=dt.Array(dt.json),
y=dt.Array(dt.json),
z=dt.Array(dt.json),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(dt.json),
)
)
assert t.schema() == expected
147 changes: 7 additions & 140 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,9 @@
except ImportError:
PySparkAnalysisException = None


try:
from polars.exceptions import PolarsInvalidOperationError
except ImportError:
PolarsInvalidOperationError = None

pytestmark = [
pytest.mark.never(
["sqlite", "mysql", "mssql"],
reason="No array support",
raises=Exception,
["sqlite", "mysql", "mssql"], reason="No array support", raises=Exception
),
pytest.mark.notyet(["impala"], reason="No array support", raises=Exception),
pytest.mark.notimpl(["druid", "oracle"], raises=Exception),
Expand Down Expand Up @@ -194,111 +186,26 @@ def test_array_index(con, idx):
pytest.mark.never(
["sqlite"], reason="array types are unsupported", raises=NotImplementedError
),
# someone just needs to implement these
# someone needs to implement these
pytest.mark.notimpl(["flink"], raises=Exception),
)


@builtin_array
@pytest.mark.never(
[
"clickhouse",
"duckdb",
"pandas",
"pyspark",
"snowflake",
"polars",
"trino",
"datafusion",
],
reason="backend does not flatten array types",
raises=AssertionError,
)
@pytest.mark.never(
["snowflake"],
reason="snowflake has an extremely specialized way of implementing arrays",
raises=AssertionError,
)
@pytest.mark.never(
["bigquery"], reason="doesn't support arrays of arrays", raises=AssertionError
)
@pytest.mark.notimpl(["dask"], raises=AssertionError)
def test_array_discovery_postgres(backend):
t = backend.array_types
expected = ibis.schema(
dict(
x=dt.Array(dt.int64),
y=dt.Array(dt.string),
z=dt.Array(dt.float64),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(dt.int64),
)
)
assert t.schema() == expected


@builtin_array
@pytest.mark.never(
["snowflake"],
reason="snowflake has an extremely specialized way of implementing arrays",
raises=AssertionError,
)
@pytest.mark.never(
[
"duckdb",
"pandas",
"postgres",
"pyspark",
"snowflake",
"polars",
"trino",
"datafusion",
],
reason="backend supports nullable nested types",
raises=AssertionError,
)
@pytest.mark.never(
["bigquery"],
reason="doesn't support arrays of arrays",
raises=AssertionError,
)
@pytest.mark.never(["dask"], raises=AssertionError, reason="allows nullable types")
def test_array_discovery_clickhouse(backend):
t = backend.array_types
expected = ibis.schema(
dict(
x=dt.Array(dt.int64, nullable=False),
y=dt.Array(dt.string, nullable=False),
z=dt.Array(dt.float64, nullable=False),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(
dt.Array(dt.int64, nullable=False),
nullable=False,
),
)
)
assert t.schema() == expected


@builtin_array
@pytest.mark.notyet(
["clickhouse", "postgres"],
reason="backend does not support nullable nested types",
raises=AssertionError,
)
@pytest.mark.never(
["bigquery"],
reason="doesn't support arrays of arrays",
raises=AssertionError,
["bigquery"], reason="doesn't support arrays of arrays", raises=AssertionError
)
@pytest.mark.never(
["snowflake"],
reason="snowflake has an extremely specialized way of implementing arrays",
raises=AssertionError,
)
def test_array_discovery_desired(backend):
def test_array_discovery(backend):
t = backend.array_types
expected = ibis.schema(
dict(
Expand All @@ -313,40 +220,6 @@ def test_array_discovery_desired(backend):
assert t.schema() == expected


@builtin_array
@pytest.mark.never(
[
"bigquery",
"clickhouse",
"dask",
"datafusion",
"duckdb",
"mysql",
"pandas",
"polars",
"postgres",
"pyspark",
"sqlite",
"trino",
],
reason="backend does not implement arrays like snowflake",
raises=AssertionError,
)
def test_array_discovery_snowflake(backend):
t = backend.array_types
expected = ibis.schema(
dict(
x=dt.Array(dt.json),
y=dt.Array(dt.json),
z=dt.Array(dt.json),
grouper=dt.string,
scalar_column=dt.float64,
multi_dim=dt.Array(dt.json),
)
)
assert t.schema() == expected


@builtin_array
@pytest.mark.notyet(
["bigquery"],
Expand Down Expand Up @@ -401,9 +274,7 @@ def test_unnest_complex(backend):

@builtin_array
@pytest.mark.never(
"pyspark",
reason="pyspark throws away nulls in collect_list",
raises=AssertionError,
"pyspark", reason="pyspark throws away nulls in collect_list", raises=AssertionError
)
@pytest.mark.never(
"clickhouse",
Expand Down Expand Up @@ -775,9 +646,7 @@ def test_unnest_struct(con):

@builtin_array
@pytest.mark.never(
["impala", "mssql"],
raises=com.OperationNotDefinedError,
reason="no array support",
["impala", "mssql"], raises=com.OperationNotDefinedError, reason="no array support"
)
@pytest.mark.notimpl(
["dask", "datafusion", "druid", "oracle", "pandas", "polars", "postgres"],
Expand Down Expand Up @@ -859,9 +728,7 @@ def flatten_data():


@pytest.mark.notyet(
["bigquery"],
reason="BigQuery doesn't support arrays of arrays",
raises=TypeError,
["bigquery"], reason="BigQuery doesn't support arrays of arrays", raises=TypeError
)
@pytest.mark.notyet(
["postgres"],
Expand Down

0 comments on commit d8d622c

Please sign in to comment.