Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Add/refine type hints to some functions in core.dtypes.cast #33286

Merged
merged 3 commits into from
Apr 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Type

import numpy as np

Expand Down Expand Up @@ -63,13 +64,18 @@
ABCDataFrame,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCExtensionArray,
ABCPeriodArray,
ABCPeriodIndex,
ABCSeries,
)
from pandas.core.dtypes.inference import is_list_like
from pandas.core.dtypes.missing import isna, notna

if TYPE_CHECKING:
from pandas import Series
from pandas.core.arrays import ExtensionArray # noqa: F401

_int8_max = np.iinfo(np.int8).max
_int16_max = np.iinfo(np.int16).max
_int32_max = np.iinfo(np.int32).max
Expand Down Expand Up @@ -246,18 +252,16 @@ def trans(x):
return result


def maybe_cast_result(
result, obj: ABCSeries, numeric_only: bool = False, how: str = ""
):
def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""):
"""
Try casting result to a different type if appropriate

Parameters
----------
result : array-like
Result to cast.
obj : ABCSeries
Input series from which result was calculated.
obj : Series
Input Series from which result was calculated.
numeric_only : bool, default False
Whether to cast only numerics or datetimes as well.
how : str, default ""
Expand Down Expand Up @@ -313,13 +317,13 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
return d.get((dtype, how), dtype)


def maybe_cast_to_extension_array(cls, obj, dtype=None):
def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
"""
Call to `_from_sequence` that returns the object unchanged on Exception.

Parameters
----------
cls : ExtensionArray subclass
cls : class, subclass of ExtensionArray
obj : arraylike
Values to pass to cls._from_sequence
dtype : ExtensionDtype, optional
Expand All @@ -329,6 +333,8 @@ def maybe_cast_to_extension_array(cls, obj, dtype=None):
ExtensionArray or obj
"""
assert isinstance(cls, type), f"must pass a type: {cls}"
assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
assert issubclass(cls, ABCExtensionArray), assertion_msg
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the assert here still necessary, or does the type hint now make this redundant?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins Would like your thoughts on this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the type hint is sufficient for the static checking (so that mypy doesn't report any errors). For the assert, it depends on the desired runtime behaviour.

try:
result = cls._from_sequence(obj, dtype=dtype)
except Exception:
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def pinner(cls):


@pin_whitelisted_properties(Series, base.series_apply_whitelist)
class SeriesGroupBy(GroupBy):
class SeriesGroupBy(GroupBy[Series]):
_apply_whitelist = base.series_apply_whitelist

def _iterate_slices(self) -> Iterable[Series]:
Expand Down Expand Up @@ -815,7 +815,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):


@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
class DataFrameGroupBy(GroupBy):
class DataFrameGroupBy(GroupBy[DataFrame]):

_apply_whitelist = base.dataframe_apply_whitelist

Expand Down Expand Up @@ -1462,7 +1462,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame:
for i, _ in enumerate(result.columns):
res = algorithms.take_1d(result.iloc[:, i].values, ids)
# TODO: we have no test cases that get here with EA dtypes;
# try_cast may not be needed if EAs never get here
# maybe_cast_result may not be needed if EAs never get here
if cast:
res = maybe_cast_result(res, obj.iloc[:, i], how=func_nm)
output.append(res)
Expand Down
16 changes: 11 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ class providing the base-class of operations.
Callable,
Dict,
FrozenSet,
Generic,
Hashable,
Iterable,
List,
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
)

Expand Down Expand Up @@ -353,13 +355,13 @@ def _group_selection_context(groupby):
]


class _GroupBy(PandasObject, SelectionMixin):
class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
_group_selection = None
_apply_whitelist: FrozenSet[str] = frozenset()

def __init__(
self,
obj: NDFrame,
obj: FrameOrSeries,
keys: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
Expand Down Expand Up @@ -995,7 +997,11 @@ def _apply_filter(self, indices, dropna):
return filtered


class GroupBy(_GroupBy):
# To track operations that expand dimensions, like ohlc
OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)


class GroupBy(_GroupBy[FrameOrSeries]):
"""
Class for grouping and aggregating relational data.

Expand Down Expand Up @@ -2420,8 +2426,8 @@ def tail(self, n=5):
return self._selected_obj[mask]

def _reindex_output(
self, output: FrameOrSeries, fill_value: Scalar = np.NaN
) -> FrameOrSeries:
self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN
) -> OutputFrameOrSeries:
"""
If we have categorical groupers, then we might want to make sure that
we have a fully re-indexed output to the levels. This means expanding
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def _aggregate_series_pure_python(self, obj: Series, func):

assert result is not None
result = lib.maybe_convert_objects(result, try_float=0)
# TODO: try_cast back to EA?
# TODO: maybe_cast_to_extension_array?

return result, counts

Expand Down