Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dictionary (mapping) as a valid argument for new_table() #4966

Merged
merged 1 commit into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions py/server/deephaven/table_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
""" This module provides various ways to make a Deephaven table. """

import datetime
from typing import Callable, List, Dict, Any, Union, Sequence, Tuple
from typing import Callable, List, Dict, Any, Union, Sequence, Tuple, Mapping

import jpy
import numpy as np
Expand Down Expand Up @@ -93,11 +93,17 @@ def time_table(period: Union[Duration, int, str, datetime.timedelta, np.timedelt
raise DHError(e, "failed to create a time table.") from e


def new_table(cols: List[InputColumn]) -> Table:
"""Creates an in-memory table from a list of input columns. Each column must have an equal number of elements.
def new_table(cols: Union[List[InputColumn], Mapping[str, Sequence]]) -> Table:
"""Creates an in-memory table from a list of input columns or a Dict (mapping) of column names and column data.
Each column must have an equal number of elements.

When the input is a mapping, an intermediary Pandas DataFrame is created from the mapping, which then is converted
to an in-memory table. In this case, as opposed to when the input is a list of InputColumns, the column types are
determined by Pandas' type inference logic.

Args:
cols (List[InputColumn]): a list of InputColumn
cols (Union[List[InputColumn], Mapping[str, Sequence]]): a list of InputColumns or a mapping of columns
names and column data.

Returns:
a Table
Expand All @@ -106,7 +112,12 @@ def new_table(cols: List[InputColumn]) -> Table:
DHError
"""
try:
return Table(j_table=_JTableFactory.newTable(*[col.j_column for col in cols]))
if isinstance(cols, list):
return Table(j_table=_JTableFactory.newTable(*[col.j_column for col in cols]))
else:
from deephaven.pandas import to_table
df = pd.DataFrame(cols).convert_dtypes()
return to_table(df)
except Exception as e:
raise DHError(e, "failed to create a new time table.") from e

Expand Down
65 changes: 63 additions & 2 deletions py/server/tests/test_table_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,68 @@ def test_new_table(self):
jobj_col(name="JObj", data=[jobj1, jobj2]),
]

t = new_table(cols=cols)
self.assertEqual(t.size, 2)
def test_new_table_dict(self):
jobj1 = JArrayList()
jobj1.add(1)
jobj1.add(-1)
jobj2 = JArrayList()
jobj2.add(2)
jobj2.add(-2)
bool_cols = {
"Boolean": [True, False],
}
integer_cols = {
"Byte": (1, -1),
"Short": [1, -1],
"Int": [1, -1],
"Long": [1, -1],
}
float_cols = {
"Float": [1.01, -1.01],
"Double": [1.01, -1.01],
}
string_cols = {
"String": np.array(["foo", "bar"]),
}
datetime_cols = {
"Datetime": np.array([1, -1], dtype=np.dtype("datetime64[ns]"))
}

obj_cols = {
"PyObj": [CustomClass(1, "1"), CustomClass(-1, "-1")],
"PyObj1": [[1, 2, 3], CustomClass(-1, "-1")],
"PyObj2": [False, 'False'],
"JObj": [jobj1, jobj2],
}

dtype_cols_map = {
dtypes.bool_: bool_cols,
dtypes.int64: integer_cols,
dtypes.float64: float_cols,
dtypes.string: string_cols,
dtypes.Instant: datetime_cols,
dtypes.PyObject: obj_cols
}

for dtype, cols in dtype_cols_map.items():
with self.subTest(f"Testing {dtype}"):
t = new_table(cols=cols)
self.assertEqual(t.size, 2)
for c in t.columns:
self.assertEqual(c.data_type, dtype)

dtype_np_cols_map = {
dtypes.int8: np.array([1, -1], dtype=np.int8),
dtypes.int16: np.array([1, -1], dtype=np.int16),
dtypes.int32: np.array([1, -1], dtype=np.int32),
dtypes.int64: np.array([1, -1], dtype=np.int64),
dtypes.float32: np.array([1.01, -1.01], dtype=np.float32),
dtypes.float64: np.array([1.01, -1.01], dtype=np.float64),
}
d_cols = {dtype.j_name.capitalize(): cols for dtype, cols in dtype_np_cols_map.items()}
t = new_table(cols=d_cols)
for tc, dtype in zip(t.columns, dtype_np_cols_map.keys()):
self.assertEqual(tc.data_type, dtype)

def test_new_table_nulls(self):
null_cols = [
Expand All @@ -155,6 +215,7 @@ def test_new_table_nulls(self):
t = new_table(cols=null_cols)
self.assertEqual(t.to_string().count("null"), len(null_cols))


def test_input_column_error(self):
j_al = JArrayList()

Expand Down
Loading