From 2ae4694e52b5a4d4e1363886a4bb29b8e48d1133 Mon Sep 17 00:00:00 2001 From: jianfengmao <jianfengmao@deephaven.io> Date: Wed, 25 Oct 2023 13:01:38 -0600 Subject: [PATCH] Fix and improve doc strings --- py/server/deephaven/pandas.py | 34 +++++++++++++---------- py/server/tests/test_pyfunc_param_null.py | 2 +- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py index 06a7c234e60..416bcfbf0c5 100644 --- a/py/server/deephaven/pandas.py +++ b/py/server/deephaven/pandas.py @@ -306,20 +306,20 @@ def to_table(df: pd.DataFrame, cols: List[str] = None) -> Table: } -def _dh_null_conv_params(new_nv): +def _dh_null_conv_params(null_value): def decorator(fn): """A decorator that replaces Deephaven null values in the parameters with the provided replacement value ( either numpy.nan or pandas.NA) before calling the decorated function. Scalar parameters must be type-hinted with precise numpy dtypes in order to be converted correctly, e.g. np.int32, np.float64, etc. If the type-hint is not precise, the conversion will use the wrong Deephaven - null value and yield unintended result. if no type-hint is provided, the conversion will skip the parameter. + null value and yield unintended result. If no type-hint is provided, the conversion will skip the parameter. Array type parameters don't need to be type-hinted. Their types can be automatically detected for the conversion to be done correctly. Note that: - if new_nv is np.nan, the conversion automatically promotes arrays of integer types to array of float64. - if new_nv is pd.NA, the conversion automatically promotes arrays to pd.Series of corresponding nullable + if null_value is np.nan, the conversion automatically promotes arrays of integer types to array of float64. + if null_value is pd.NA, the conversion automatically promotes arrays to pd.Series of corresponding nullable types. """ fn_sig = _encode_signature(fn) @@ -331,18 +331,18 @@ def wrapper(*args, **kwargs): if np_dtype := _J_ARRAY_NP_TYPE_MAP.get(type(arg)): np_array = np.frombuffer(arg, dtype=np_dtype) dtype = dtypes.from_np_dtype(np_dtype) - if new_nv is np.nan: + if null_value is np.nan: dh_null = _PRIMITIVE_DTYPE_NULL_MAP.get(dtype) if dtype in (dtypes.float32, dtypes.float64): np_array = np.copy(np_array) else: np_array = np_array.astype(np.float64) - np_array[np_array == dh_null] = new_nv + np_array[np_array == dh_null] = null_value converted_args.append(np_array) - else: # new_nv is pd.NA: + else: # null_value is pd.NA: pd_series = _j_array_to_series(arg, dtype=dtype, conv_null=True) converted_args.append(pd_series) - else: + else: # scalar type or array types that don't need conversion try: np_dtype = np.dtype(np_dtype_char) except TypeError: @@ -351,7 +351,7 @@ def wrapper(*args, **kwargs): dtype = dtypes.from_np_dtype(np_dtype) dh_null = _PRIMITIVE_DTYPE_NULL_MAP.get(dtype) if dh_null is not None: - converted_args.append(new_nv if arg == dh_null else arg) + converted_args.append(null_value if arg == dh_null else arg) else: converted_args.append(arg) @@ -369,12 +369,14 @@ def dh_null_to_nan(fn): Scalar type parameters must be type-hinted with precise numpy dtypes in order to be converted correctly, e.g. np.int32, np.float64, etc. If the type-hint is not precise, the conversion will use the wrong Deephaven - null value and yield unintended result. if no type-hint is provided, the conversion will skip the parameter. + null value and yield unintended result. If no type-hint is provided, the conversion will skip the parameter. Array type parameters don't need to be type-hinted, their types can be automatically detected for the conversion be done correctly. - Note that the conversion automatically promotes arrays of integer types to array of float64. + Note: + 1. the conversion automatically promotes arrays of integer types to array of float64 + 2. copies of the original arrays are made before the conversion """ return _dh_null_conv_params(np.nan)(fn) @@ -386,12 +388,16 @@ def dh_null_to_na(fn): Scalar type parameters must be type-hinted with precise numpy dtypes in order to be converted correctly, e.g. np.int32, np.float64, etc. If the type-hint is not precise, the conversion will use the wrong Deephaven - null value and yield unintended result. if no type-hint is provided, the conversion will skip the parameter. + null value and yield unintended result. If no type-hint is provided, the conversion will skip the parameter. Array type parameters don't need to be type-hinted, their types can be automatically detected for the conversion be done correctly. - Note that the conversion automatically promotes arrays to pd.Series of corresponding nullable - types. + Note: + 1. the conversion automatically promotes arrays to pd.Series of corresponding nullable types + 2. for nullable extension types, pandas internally uses a mask array to flag the NA values in the original + numpy array + 3. copies of boolean and datetime arrays are made before the conversion + """ return _dh_null_conv_params(pd.NA)(fn) diff --git a/py/server/tests/test_pyfunc_param_null.py b/py/server/tests/test_pyfunc_param_null.py index 1b46b5871bd..cb765c51f64 100644 --- a/py/server/tests/test_pyfunc_param_null.py +++ b/py/server/tests/test_pyfunc_param_null.py @@ -88,7 +88,7 @@ def custom_func4(col: {np_type}) -> bool: res = tbl.update("Col4 = custom_func4(Col2)") self.assertEqual(4, res.to_string().count("true")) - # vectorization implicitly replaces scalar argument with Java arrays which dh_null_to_nam is able to + # vectorization implicitly replaces scalar argument with Java arrays which dh_null_to_nan is able to # infer the component type correctly @dh_null_to_nan @dh_vectorize