Skip to content

Commit

Permalink
Fix test failures and refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
jmao-denver committed Nov 26, 2023
1 parent a4016cc commit 140ba97
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 367 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ public class PyCallableWrapperJpyImpl implements PyCallableWrapper {
private static final Map<Character, Class<?>> numpyType2JavaClass = new HashMap<>();

static {
numpyType2JavaClass.put('b', byte.class);
numpyType2JavaClass.put('h', short.class);
numpyType2JavaClass.put('H', char.class);
numpyType2JavaClass.put('i', int.class);
numpyType2JavaClass.put('l', long.class);
numpyType2JavaClass.put('h', short.class);
numpyType2JavaClass.put('f', float.class);
numpyType2JavaClass.put('d', double.class);
numpyType2JavaClass.put('b', byte.class);
numpyType2JavaClass.put('?', boolean.class);
numpyType2JavaClass.put('U', String.class);
numpyType2JavaClass.put('M', Instant.class);
Expand Down
25 changes: 19 additions & 6 deletions py/server/deephaven/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def __call__(self, *args, **kwargs):
"""Double-precision floating-point number type"""
string = DType(j_name="java.lang.String", qst_type=_JQstType.stringType(), np_type=np.str_)
"""String type"""
Character = DType(j_name="java.lang.Character")
"""Character type"""
BigDecimal = DType(j_name="java.math.BigDecimal")
"""Java BigDecimal type"""
StringSet = DType(j_name="io.deephaven.stringset.StringSet")
Expand Down Expand Up @@ -339,8 +341,19 @@ def from_np_dtype(np_dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]) -
return PyObject


_NUMPY_INT_TYPE_CODES = ["i", "l", "h", "b"]
_NUMPY_FLOATING_TYPE_CODES = ["f", "d"]
_NUMPY_INT_TYPE_CODES = {"b", "h", "H", "i", "l"}
_NUMPY_FLOATING_TYPE_CODES = {"f", "d"}


def _is_py_null(x: Any) -> bool:
"""Checks if the value is a Python null value, i.e. None or NaN, or Pandas.NA."""
if x is None:
return True

try:
return bool(pd.isna(x))
except (TypeError, ValueError):
return False


def _scalar(x: Any, dtype: DType) -> Any:
Expand All @@ -350,12 +363,14 @@ def _scalar(x: Any, dtype: DType) -> Any:

# NULL_BOOL will appear in Java as a byte value which causes a cast error. We just let JPY converts it to Java null
# and the engine has casting logic to handle it.
if x is None and dtype != bool_ and _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
if _is_py_null(x) and dtype not in (bool_, char) and _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
return _PRIMITIVE_DTYPE_NULL_MAP[dtype]

try:
if hasattr(x, "dtype"):
if x.dtype.char in _NUMPY_INT_TYPE_CODES:
if x.dtype.char == 'H': # np.uint16 maps to Java char
return Character(int(x))
elif x.dtype.char in _NUMPY_INT_TYPE_CODES:
return int(x)
elif x.dtype.char in _NUMPY_FLOATING_TYPE_CODES:
return float(x)
Expand All @@ -368,8 +383,6 @@ def _scalar(x: Any, dtype: DType) -> Any:
elif x.dtype.char == 'M':
from deephaven.time import to_j_instant
return to_j_instant(x)
elif x.dtype.char == 'H': # np.uint16
return jpy.get_type("java.lang.Character")(int(x))
elif isinstance(x, (datetime.datetime, pd.Timestamp)):
from deephaven.time import to_j_instant
return to_j_instant(x)
Expand Down
31 changes: 0 additions & 31 deletions py/server/deephaven/jcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,34 +283,3 @@ def _j_array_to_series(dtype: DType, j_array: jpy.JType, conv_null: bool) -> pd.
s = pd.Series(data=np_array, copy=False)

return s


def _convert_udf_args(args: Tuple[Any], fn_signature: str, null_value: Literal[np.nan, pd.NA, None]) -> List[Any]:
converted_args = []
for arg, np_dtype_char in zip(args, fn_signature):
if np_dtype_char == 'O':
converted_args.append(arg)
elif src_np_dtype := _J_ARRAY_NP_TYPE_MAP.get(type(arg)):
# array types
np_dtype = np.dtype(np_dtype_char)
if src_np_dtype != np_dtype and np_dtype != np.object_:
raise DHError(f"Cannot convert Java array of type {src_np_dtype} to numpy array of type {np_dtype}")
dtype = dtypes.from_np_dtype(np_dtype)
if null_value is pd.NA:
converted_args.append(_j_array_to_series(dtype, arg, conv_null=True))
else: # np.nan or None
converted_args.append(_j_array_to_numpy_array(dtype, arg, conv_null=bool(null_value)))
else: # scalar type or array types that don't need conversion
try:
np_dtype = np.dtype(np_dtype_char)
except TypeError:
converted_args.append(arg)
else:
dtype = dtypes.from_np_dtype(np_dtype)
if dtype is dtypes.bool_:
converted_args.append(null_value if arg is None else arg)
elif dh_null := _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
converted_args.append(null_value if arg == dh_null else arg)
else:
converted_args.append(arg)
return converted_args
5 changes: 2 additions & 3 deletions py/server/deephaven/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

""" This module supports the conversion between Deephaven tables and numpy arrays. """
import re
from functools import wraps
from typing import List

import jpy
Expand All @@ -13,8 +12,8 @@
from deephaven import DHError, dtypes, new_table
from deephaven.column import Column, InputColumn
from deephaven.dtypes import DType
from deephaven.jcompat import _j_array_to_numpy_array, _convert_udf_args
from deephaven.table import Table, _encode_signature
from deephaven.jcompat import _j_array_to_numpy_array
from deephaven.table import Table

_JDataAccessHelpers = jpy.get_type("io.deephaven.engine.table.impl.DataAccessHelpers")

Expand Down
28 changes: 14 additions & 14 deletions py/server/deephaven/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from enum import Enum
from enum import auto
from functools import wraps
from typing import Any, Optional, Callable, Dict, _GenericAlias, Set, Tuple
from typing import Any, Optional, Callable, Dict, _GenericAlias, Tuple
from typing import Sequence, List, Union, Protocol

import jpy
Expand All @@ -30,14 +30,14 @@
from deephaven.agg import Aggregation
from deephaven.column import Column, ColumnType
from deephaven.filters import Filter, and_, or_
from deephaven.jcompat import j_unary_operator, j_binary_operator, j_map_to_dict, j_hashmap, _convert_udf_args, \
_j_array_to_numpy_array
from deephaven.jcompat import j_unary_operator, j_binary_operator, j_map_to_dict, j_hashmap, _j_array_to_numpy_array
from deephaven.jcompat import to_sequence, j_array_list
from deephaven.time import to_np_datetime64
from deephaven.update_graph import auto_locking_ctx, UpdateGraph
from deephaven.updateby import UpdateByOperation
from deephaven.dtypes import _BUILDABLE_ARRAY_DTYPE_MAP, _scalar, _np_dtype_char, _component_np_dtype_char, DType, \
_np_ndarray_component_type, _J_ARRAY_NP_TYPE_MAP, _PRIMITIVE_DTYPE_NULL_MAP
_np_ndarray_component_type, _J_ARRAY_NP_TYPE_MAP, _PRIMITIVE_DTYPE_NULL_MAP, _NUMPY_INT_TYPE_CODES, \
_NUMPY_FLOATING_TYPE_CODES

# Table
_J_Table = jpy.get_type("io.deephaven.engine.table.Table")
Expand Down Expand Up @@ -368,7 +368,7 @@ def _j_py_script_session() -> _JPythonScriptSession:
return None


_SUPPORTED_NP_TYPE_CODES = {"b", "h", "i", "l", "f", "d", "?", "U", "M", "O"}
_SUPPORTED_NP_TYPE_CODES = {"b", "h", "H", "i", "l", "f", "d", "?", "U", "M", "O"}


@dataclass
Expand Down Expand Up @@ -444,11 +444,11 @@ def _parse_type_no_nested(annotation, p_annotation, t):
p_annotation.is_array = True
if tc in {"N", "O", "?", "U", "M"}:
p_annotation.is_none_legal = True
if tc in {"b", "h", "i", "l"}:
if tc in _NUMPY_INT_TYPE_CODES:
if p_annotation.int_char and p_annotation.int_char != tc:
raise DHError(message=f"ambiguity detected: multiple integer types in annotation: {annotation}")
p_annotation.int_char = tc
if tc in {"f", "d"}:
if tc in _NUMPY_FLOATING_TYPE_CODES:
if p_annotation.floating_char and p_annotation.floating_char != tc:
raise DHError(message=f"ambiguity detected: multiple floating types in annotation: {annotation}")
p_annotation.floating_char = tc
Expand Down Expand Up @@ -499,9 +499,9 @@ def _parse_numba_signature(fn: Union[numba.np.ufunc.gufunc.GUFunc, numba.np.ufun
for p in params:
pa = ParsedAnnotation()
pa.encoded_types.add(p)
if p in {"b", "h", "i", "l"}:
if p in _NUMPY_INT_TYPE_CODES:
pa.int_char = p
if p in {"f", "d"}:
if p in _NUMPY_FLOATING_TYPE_CODES:
pa.floating_char = p
p_annotations.append(pa)
else: # GUFunc
Expand All @@ -517,9 +517,9 @@ def _parse_numba_signature(fn: Union[numba.np.ufunc.gufunc.GUFunc, numba.np.ufun
pa.is_array = True
else:
pa.encoded_types.add(p)
if p in {"b", "h", "i", "l"}:
if p in _NUMPY_INT_TYPE_CODES:
pa.int_char = p
if p in {"f", "d"}:
if p in _NUMPY_FLOATING_TYPE_CODES:
pa.floating_char = p
p_annotations.append(pa)

Expand Down Expand Up @@ -718,9 +718,9 @@ def dh_vectorize(fn):

@wraps(fn)
def wrapper(*args):
if len(args) != len(p_sig.params) + 2:
raise ValueError(
f"The number of arguments doesn't match the function signature. {len(args) - 2}, {sig_str}")
# if len(args) != len(p_sig.params) + 2:
# raise ValueError(
# f"The number of arguments doesn't match the function signature. {len(args) - 2}, {sig_str}")
if args[0] <= 0:
raise ValueError(f"The chunk size argument must be a positive integer. {args[0]}")

Expand Down
Loading

0 comments on commit 140ba97

Please sign in to comment.