Fix test failures and refactor code

deephaven · Nov 26, 2023 · 140ba97 · 140ba97
1 parent a4016cc
commit 140ba97
Show file tree

Hide file tree

Showing 7 changed files with 114 additions and 367 deletions.
diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java
@@ -29,12 +29,13 @@ public class PyCallableWrapperJpyImpl implements PyCallableWrapper {
     private static final Map<Character, Class<?>> numpyType2JavaClass = new HashMap<>();
 
     static {
+        numpyType2JavaClass.put('b', byte.class);
+        numpyType2JavaClass.put('h', short.class);
+        numpyType2JavaClass.put('H', char.class);
         numpyType2JavaClass.put('i', int.class);
         numpyType2JavaClass.put('l', long.class);
-        numpyType2JavaClass.put('h', short.class);
         numpyType2JavaClass.put('f', float.class);
         numpyType2JavaClass.put('d', double.class);
-        numpyType2JavaClass.put('b', byte.class);
         numpyType2JavaClass.put('?', boolean.class);
         numpyType2JavaClass.put('U', String.class);
         numpyType2JavaClass.put('M', Instant.class);

diff --git a/py/server/deephaven/dtypes.py b/py/server/deephaven/dtypes.py
@@ -102,6 +102,8 @@ def __call__(self, *args, **kwargs):
 """Double-precision floating-point number type"""
 string = DType(j_name="java.lang.String", qst_type=_JQstType.stringType(), np_type=np.str_)
 """String type"""
+Character = DType(j_name="java.lang.Character")
+"""Character type"""
 BigDecimal = DType(j_name="java.math.BigDecimal")
 """Java BigDecimal type"""
 StringSet = DType(j_name="io.deephaven.stringset.StringSet")
@@ -339,8 +341,19 @@ def from_np_dtype(np_dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]) -
     return PyObject
 
 
-_NUMPY_INT_TYPE_CODES = ["i", "l", "h", "b"]
-_NUMPY_FLOATING_TYPE_CODES = ["f", "d"]
+_NUMPY_INT_TYPE_CODES = {"b", "h", "H", "i", "l"}
+_NUMPY_FLOATING_TYPE_CODES = {"f", "d"}
+
+
+def _is_py_null(x: Any) -> bool:
+    """Checks if the value is a Python null value, i.e. None or NaN, or Pandas.NA."""
+    if x is None:
+        return True
+
+    try:
+        return bool(pd.isna(x))
+    except (TypeError, ValueError):
+        return False
 
 
 def _scalar(x: Any, dtype: DType) -> Any:
@@ -350,12 +363,14 @@ def _scalar(x: Any, dtype: DType) -> Any:
 
     # NULL_BOOL will appear in Java as a byte value which causes a cast error. We just let JPY converts it to Java null
     # and the engine has casting logic to handle it.
-    if x is None and dtype != bool_ and _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
+    if _is_py_null(x) and dtype not in (bool_, char) and _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
         return _PRIMITIVE_DTYPE_NULL_MAP[dtype]
 
     try:
         if hasattr(x, "dtype"):
-            if x.dtype.char in _NUMPY_INT_TYPE_CODES:
+            if x.dtype.char == 'H':  # np.uint16 maps to Java char
+                return Character(int(x))
+            elif x.dtype.char in _NUMPY_INT_TYPE_CODES:
                 return int(x)
             elif x.dtype.char in _NUMPY_FLOATING_TYPE_CODES:
                 return float(x)
@@ -368,8 +383,6 @@ def _scalar(x: Any, dtype: DType) -> Any:
             elif x.dtype.char == 'M':
                 from deephaven.time import to_j_instant
                 return to_j_instant(x)
-            elif x.dtype.char == 'H':  # np.uint16
-                return jpy.get_type("java.lang.Character")(int(x))
         elif isinstance(x, (datetime.datetime, pd.Timestamp)):
                 from deephaven.time import to_j_instant
                 return to_j_instant(x)

diff --git a/py/server/deephaven/jcompat.py b/py/server/deephaven/jcompat.py
@@ -283,34 +283,3 @@ def _j_array_to_series(dtype: DType, j_array: jpy.JType, conv_null: bool) -> pd.
         s = pd.Series(data=np_array, copy=False)
 
     return s
-
-
-def _convert_udf_args(args: Tuple[Any], fn_signature: str, null_value: Literal[np.nan, pd.NA, None]) -> List[Any]:
-    converted_args = []
-    for arg, np_dtype_char in zip(args, fn_signature):
-        if np_dtype_char == 'O':
-            converted_args.append(arg)
-        elif src_np_dtype := _J_ARRAY_NP_TYPE_MAP.get(type(arg)):
-            # array types
-            np_dtype = np.dtype(np_dtype_char)
-            if src_np_dtype != np_dtype and np_dtype != np.object_:
-                raise DHError(f"Cannot convert Java array of type {src_np_dtype} to numpy array of type {np_dtype}")
-            dtype = dtypes.from_np_dtype(np_dtype)
-            if null_value is pd.NA:
-                converted_args.append(_j_array_to_series(dtype, arg, conv_null=True))
-            else:  # np.nan or None
-                converted_args.append(_j_array_to_numpy_array(dtype, arg, conv_null=bool(null_value)))
-        else:  # scalar type or array types that don't need conversion
-            try:
-                np_dtype = np.dtype(np_dtype_char)
-            except TypeError:
-                converted_args.append(arg)
-            else:
-                dtype = dtypes.from_np_dtype(np_dtype)
-                if dtype is dtypes.bool_:
-                    converted_args.append(null_value if arg is None else arg)
-                elif dh_null := _PRIMITIVE_DTYPE_NULL_MAP.get(dtype):
-                    converted_args.append(null_value if arg == dh_null else arg)
-                else:
-                    converted_args.append(arg)
-    return converted_args
diff --git a/py/server/deephaven/numpy.py b/py/server/deephaven/numpy.py
@@ -4,7 +4,6 @@
 
 """ This module supports the conversion between Deephaven tables and numpy arrays. """
 import re
-from functools import wraps
 from typing import List
 
 import jpy
@@ -13,8 +12,8 @@
 from deephaven import DHError, dtypes, new_table
 from deephaven.column import Column, InputColumn
 from deephaven.dtypes import DType
-from deephaven.jcompat import _j_array_to_numpy_array, _convert_udf_args
-from deephaven.table import Table, _encode_signature
+from deephaven.jcompat import _j_array_to_numpy_array
+from deephaven.table import Table
 
 _JDataAccessHelpers = jpy.get_type("io.deephaven.engine.table.impl.DataAccessHelpers")
 

diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py
@@ -14,7 +14,7 @@
 from enum import Enum
 from enum import auto
 from functools import wraps
-from typing import Any, Optional, Callable, Dict, _GenericAlias, Set, Tuple
+from typing import Any, Optional, Callable, Dict, _GenericAlias, Tuple
 from typing import Sequence, List, Union, Protocol
 
 import jpy
@@ -30,14 +30,14 @@
 from deephaven.agg import Aggregation
 from deephaven.column import Column, ColumnType
 from deephaven.filters import Filter, and_, or_
-from deephaven.jcompat import j_unary_operator, j_binary_operator, j_map_to_dict, j_hashmap, _convert_udf_args, \
-    _j_array_to_numpy_array
+from deephaven.jcompat import j_unary_operator, j_binary_operator, j_map_to_dict, j_hashmap, _j_array_to_numpy_array
 from deephaven.jcompat import to_sequence, j_array_list
 from deephaven.time import to_np_datetime64
 from deephaven.update_graph import auto_locking_ctx, UpdateGraph
 from deephaven.updateby import UpdateByOperation
 from deephaven.dtypes import _BUILDABLE_ARRAY_DTYPE_MAP, _scalar, _np_dtype_char, _component_np_dtype_char, DType, \
-    _np_ndarray_component_type, _J_ARRAY_NP_TYPE_MAP, _PRIMITIVE_DTYPE_NULL_MAP
+    _np_ndarray_component_type, _J_ARRAY_NP_TYPE_MAP, _PRIMITIVE_DTYPE_NULL_MAP, _NUMPY_INT_TYPE_CODES, \
+    _NUMPY_FLOATING_TYPE_CODES
 
 # Table
 _J_Table = jpy.get_type("io.deephaven.engine.table.Table")
@@ -368,7 +368,7 @@ def _j_py_script_session() -> _JPythonScriptSession:
         return None
 
 
-_SUPPORTED_NP_TYPE_CODES = {"b", "h", "i", "l", "f", "d", "?", "U", "M", "O"}
+_SUPPORTED_NP_TYPE_CODES = {"b", "h", "H", "i", "l", "f", "d", "?", "U", "M", "O"}
 
 
 @dataclass
@@ -444,11 +444,11 @@ def _parse_type_no_nested(annotation, p_annotation, t):
         p_annotation.is_array = True
     if tc in {"N", "O", "?", "U", "M"}:
         p_annotation.is_none_legal = True
-    if tc in {"b", "h", "i", "l"}:
+    if tc in _NUMPY_INT_TYPE_CODES:
         if p_annotation.int_char and p_annotation.int_char != tc:
             raise DHError(message=f"ambiguity detected: multiple integer types in annotation: {annotation}")
         p_annotation.int_char = tc
-    if tc in {"f", "d"}:
+    if tc in _NUMPY_FLOATING_TYPE_CODES:
         if p_annotation.floating_char and p_annotation.floating_char != tc:
             raise DHError(message=f"ambiguity detected: multiple floating types in annotation: {annotation}")
         p_annotation.floating_char = tc
@@ -499,9 +499,9 @@ def _parse_numba_signature(fn: Union[numba.np.ufunc.gufunc.GUFunc, numba.np.ufun
             for p in params:
                 pa = ParsedAnnotation()
                 pa.encoded_types.add(p)
-                if p in {"b", "h", "i", "l"}:
+                if p in _NUMPY_INT_TYPE_CODES:
                     pa.int_char = p
-                if p in {"f", "d"}:
+                if p in _NUMPY_FLOATING_TYPE_CODES:
                     pa.floating_char = p
                 p_annotations.append(pa)
         else:  # GUFunc
@@ -517,9 +517,9 @@ def _parse_numba_signature(fn: Union[numba.np.ufunc.gufunc.GUFunc, numba.np.ufun
                     pa.is_array = True
                 else:
                     pa.encoded_types.add(p)
-                    if p in {"b", "h", "i", "l"}:
+                    if p in _NUMPY_INT_TYPE_CODES:
                         pa.int_char = p
-                    if p in {"f", "d"}:
+                    if p in _NUMPY_FLOATING_TYPE_CODES:
                         pa.floating_char = p
                 p_annotations.append(pa)
 
@@ -718,9 +718,9 @@ def dh_vectorize(fn):
 
     @wraps(fn)
     def wrapper(*args):
-        if len(args) != len(p_sig.params) + 2:
-            raise ValueError(
-                f"The number of arguments doesn't match the function signature. {len(args) - 2}, {sig_str}")
+        # if len(args) != len(p_sig.params) + 2:
+        #     raise ValueError(
+        #         f"The number of arguments doesn't match the function signature. {len(args) - 2}, {sig_str}")
         if args[0] <= 0:
             raise ValueError(f"The chunk size argument must be a positive integer. {args[0]}")