diff --git a/pyproject.toml b/pyproject.toml index c4f10f2e19f..a0ce37a73d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,25 +95,33 @@ select = [ "UP035", # usage of legacy `np.random` function calls "NPY002", + # Ruff-specific rules + "RUF", ] ignore = [ # whitespace before : "E203", # line-too-long (due to Copyright header) "E501", + # String contains ambiguous character + "RUF001", + # Parenthesize `a and b` expressions when chaining `and` and `or` + # together, to make the precedence clear + "RUF021", + # Mutable class attributes should be annotated with + # `typing.ClassVar` + "RUF012", ] fixable = ["ALL"] exclude = [ - # TODO: Remove this in a follow-up where we fix __all__. - "__init__.py", - # TODO: https://github.com/rapidsai/cudf/issues/17461 - "**/*.ipynb", + # TODO: https://github.com/rapidsai/cudf/issues/17461 + "**/*.ipynb", ] [tool.ruff.format] exclude = [ - # TODO: https://github.com/rapidsai/cudf/issues/17461 - "**/*.ipynb", + # TODO: https://github.com/rapidsai/cudf/issues/17461 + "**/*.ipynb", ] [tool.ruff.lint.per-file-ignores] diff --git a/python/cudf/benchmarks/common/config.py b/python/cudf/benchmarks/common/config.py index c1e9d4d6116..872ba424d20 100644 --- a/python/cudf/benchmarks/common/config.py +++ b/python/cudf/benchmarks/common/config.py @@ -42,9 +42,9 @@ def pytest_collection_modifyitems(session, config, items): items[:] = list(filter(is_pandas_compatible, items)) else: - import cupy # noqa: W0611, F401 + import cupy # noqa: F401 - import cudf # noqa: W0611, F401 + import cudf # noqa: F401 def pytest_collection_modifyitems(session, config, items): pass diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py index 0e4afadccf5..24ff211387c 100644 --- a/python/cudf/benchmarks/conftest.py +++ b/python/cudf/benchmarks/conftest.py @@ -56,18 +56,16 @@ # into the main repo. sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common")) -# Turn off isort until we upgrade to 5.8.0 -# https://github.com/pycqa/isort/issues/1594 -from config import ( # noqa: W0611, E402, F401 +from config import ( NUM_COLS, NUM_ROWS, - collect_ignore, - cudf, # noqa: W0611, E402, F401 - pytest_collection_modifyitems, - pytest_sessionfinish, - pytest_sessionstart, + collect_ignore, # noqa: F401 + cudf, + pytest_collection_modifyitems, # noqa: F401 + pytest_sessionfinish, # noqa: F401 + pytest_sessionstart, # noqa: F401 ) -from utils import ( # noqa: E402 +from utils import ( OrderedSet, collapse_fixtures, column_generators, diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 99b759e2166..843f2670b4d 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -99,6 +99,7 @@ __all__ = [ + "NA", "BaseIndex", "CategoricalDtype", "CategoricalIndex", @@ -114,7 +115,6 @@ "IntervalIndex", "ListDtype", "MultiIndex", - "NA", "NaT", "RangeIndex", "Scalar", diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py index ee1b2c1f1c4..4b080937a17 100644 --- a/python/cudf/cudf/_fuzz_testing/fuzzer.py +++ b/python/cudf/cudf/_fuzz_testing/fuzzer.py @@ -95,7 +95,7 @@ def start(self): else: self._data_handler.set_rand_params(self.params) kwargs = self._data_handler._current_params["test_kwargs"] - logging.info(f"Parameters passed: {str(kwargs)}") + logging.info(f"Parameters passed: {kwargs!s}") self._target(file_name, **kwargs) except KeyboardInterrupt: logging.info( diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index dfa3ef0df92..2df154ee112 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -133,7 +133,7 @@ def memory_usage(self, deep=False): """ raise NotImplementedError - def tolist(self): # noqa: D102 + def tolist(self): raise TypeError( "cuDF does not support conversion to host memory " "via the `tolist()` method. Consider using " @@ -148,7 +148,7 @@ def name(self): raise NotImplementedError @property # type: ignore - def ndim(self) -> int: # noqa: D401 + def ndim(self) -> int: """Number of dimensions of the underlying data, by definition 1.""" return 1 @@ -265,7 +265,7 @@ def get_loc(self, key): slice(1, 3, None) >>> multi_index.get_loc(('b', 'e')) 1 - """ # noqa: E501 + """ def max(self): """The maximum value of the index.""" diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index ed351a6b107..07d0d698cb8 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -54,7 +54,7 @@ def get_rmm_memory_resource_stack( """ if hasattr(mr, "upstream_mr"): - return [mr] + get_rmm_memory_resource_stack(mr.upstream_mr) + return [mr, *get_rmm_memory_resource_stack(mr.upstream_mr)] return [mr] @@ -275,7 +275,7 @@ def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool: print( f"[WARNING] RMM allocation of {format_bytes(nbytes)} bytes " "failed, spill-on-demand couldn't find any device memory to " - f"spill:\n{repr(self)}\ntraceback:\n{get_traceback()}\n" + f"spill:\n{self!r}\ntraceback:\n{get_traceback()}\n" f"{self.statistics}" ) return False # Since we didn't find anything to spill, we give up diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index b40c56c9a6b..7305ff651c6 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -366,7 +366,7 @@ def __str__(self) -> str: f"<{self.__class__.__name__} size={format_bytes(self._size)} " f"spillable={self.spillable} exposed={self.exposed} " f"num-spill-locks={len(self._spill_locks)} " - f"ptr={ptr_info} owner={repr(self._owner)}>" + f"ptr={ptr_info} owner={self._owner!r}>" ) diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py index a1e87d04bc9..7e8523c5277 100644 --- a/python/cudf/cudf/core/column/__init__.py +++ b/python/cudf/cudf/core/column/__init__.py @@ -15,17 +15,19 @@ deserialize_columns, serialize_columns, ) -from cudf.core.column.datetime import DatetimeColumn # noqa: F401 -from cudf.core.column.datetime import DatetimeTZColumn # noqa: F401 -from cudf.core.column.lists import ListColumn # noqa: F401 -from cudf.core.column.numerical import NumericalColumn # noqa: F401 -from cudf.core.column.string import StringColumn # noqa: F401 -from cudf.core.column.struct import StructColumn # noqa: F401 -from cudf.core.column.timedelta import TimeDeltaColumn # noqa: F401 -from cudf.core.column.interval import IntervalColumn # noqa: F401 -from cudf.core.column.decimal import ( # noqa: F401 +from cudf.core.column.datetime import ( + DatetimeColumn, + DatetimeTZColumn, +) +from cudf.core.column.decimal import ( Decimal32Column, Decimal64Column, Decimal128Column, DecimalBaseColumn, ) +from cudf.core.column.interval import IntervalColumn +from cudf.core.column.lists import ListColumn +from cudf.core.column.numerical import NumericalColumn +from cudf.core.column.string import StringColumn +from cudf.core.column.struct import StructColumn +from cudf.core.column.timedelta import TimeDeltaColumn diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index ce7aa91f775..bb923b8caa2 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -435,7 +435,7 @@ def _get_decimal_type( `op` for the given dtypes. For precision & scale calculations see : https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql - """ # noqa: E501 + """ # This should at some point be hooked up to libcudf's # binary_operation_fixed_point_scale @@ -506,8 +506,8 @@ def _get_decimal_type( # if we've reached this point, we cannot create a decimal type without # overflow; raise an informative error raise ValueError( - f"Performing {op} between columns of type {repr(lhs_dtype)} and " - f"{repr(rhs_dtype)} would result in overflow" + f"Performing {op} between columns of type {lhs_dtype!r} and " + f"{rhs_dtype!r} would result in overflow" ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a7538c1c947..3a569832e8f 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -226,7 +226,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: # If `other` is a Python integer and it is out-of-bounds # promotion could fail but we can trivially define the result # in terms of `notnull` or `NULL_NOT_EQUALS`. - if type(other) is int and self.dtype.kind in "iu": # noqa: E721 + if type(other) is int and self.dtype.kind in "iu": truthiness = None iinfo = np.iinfo(self.dtype) if iinfo.min > other: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 916fecebf56..8f466b0739b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -548,7 +548,7 @@ def join( 2 3 c-d dtype: object - """ # noqa E501 + """ if sep is None: sep = "" @@ -694,7 +694,7 @@ def extract( The `flags` parameter currently only supports re.DOTALL and re.MULTILINE. - """ # noqa W605 + """ if not _is_supported_regex_flags(flags): raise NotImplementedError( "unsupported value for `flags` parameter" @@ -830,7 +830,7 @@ def contains( value is set. The `flags` parameter currently only supports re.DOTALL and re.MULTILINE. - """ # noqa W605 + """ if na is not np.nan: raise NotImplementedError("`na` parameter is not yet supported") if regex and isinstance(pat, re.Pattern): @@ -3675,7 +3675,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex: - Some characters need to be escaped when passing in pat. e.g. ``'$'`` has a special meaning in regex and must be escaped when finding this literal character. - """ # noqa W605 + """ if isinstance(pat, re.Pattern): flags = pat.flags & ~re.U pat = pat.pattern diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 620fe31c30f..efa4892333d 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -468,7 +468,7 @@ def components(self) -> dict[str, ColumnBase]: 2 13000 10 12 48 712 0 0 3 0 0 35 35 656 0 0 4 37 13 12 14 234 0 0 - """ # noqa: E501 + """ date_meta = { "seconds": ["m", "s"], diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 496e86ed709..e4fd82e819b 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -49,7 +49,7 @@ def from_zip(cls, data: abc.Iterator): def __getitem__(self, key): """Recursively apply dict.__getitem__ for nested elements.""" # As described in the pandas docs - # https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index # noqa: E501 + # https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index # accessing nested elements of a multiindex must be done using a tuple. # Lists and other sequences are treated as accessing multiple elements # at the top level of the index. @@ -62,10 +62,10 @@ def _to_flat_dict_inner(d: dict, parents: tuple = ()): for k, v in d.items(): if not isinstance(v, d.__class__): if parents: - k = parents + (k,) + k = (*parents, k) yield (k, v) else: - yield from _to_flat_dict_inner(d=v, parents=parents + (k,)) + yield from _to_flat_dict_inner(d=v, parents=(*parents, k)) class ColumnAccessor(abc.MutableMapping): diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index a4d12cfc7f0..5bfea45a946 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -149,7 +149,7 @@ def cut( if len(set(bins)) is not len(bins): if duplicates == "raise": raise ValueError( - f"Bin edges must be unique: {repr(bins)}.\n" + f"Bin edges must be unique: {bins!r}.\n" f"You can drop duplicate edges by setting the 'duplicates'" "kwarg" ) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index a2e213c9c94..b77d757f99d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2248,7 +2248,7 @@ def from_dict( n1 n2 a b 1 3 c 2 4 - """ # noqa: E501 + """ orient = orient.lower() if orient == "index": @@ -2405,7 +2405,7 @@ def to_dict( >>> df.to_dict('records', into=dd) [defaultdict(, {'col1': 1, 'col2': 0.5}), defaultdict(, {'col1': 2, 'col2': 0.75})] - """ # noqa: E501 + """ orient = orient.lower() if orient == "series": @@ -3033,7 +3033,7 @@ def set_index( if len(keys) == 0: raise ValueError("No valid columns to be added to index.") if append: - keys = [self.index] + keys + keys = [self.index, *keys] # Preliminary type check labels_not_found = [] @@ -3099,7 +3099,7 @@ def set_index( @_performance_tracking def fillna( self, value=None, method=None, axis=None, inplace=False, limit=None - ): # noqa: D102 + ): if isinstance(value, (pd.Series, pd.DataFrame)): value = cudf.from_pandas(value) if isinstance(value, cudf.Series): @@ -3580,7 +3580,7 @@ def drop_duplicates( 1 Yum Yum cup 4.0 2 Indomie cup 3.5 4 Indomie pack 5.0 - """ # noqa: E501 + """ outdf = super().drop_duplicates( subset=subset, keep=keep, @@ -4860,7 +4860,7 @@ def map( if na_action not in {"ignore", None}: raise ValueError( - f"na_action must be 'ignore' or None. Got {repr(na_action)}" + f"na_action must be 'ignore' or None. Got {na_action!r}" ) if na_action == "ignore": @@ -6145,7 +6145,7 @@ def quantile( non-numeric types and result is expected to be a Series in case of Pandas. cuDF will return a DataFrame as it doesn't support mixed types under Series. - """ # noqa: E501 + """ if axis not in (0, None): raise NotImplementedError("axis is not implemented yet") @@ -6844,7 +6844,7 @@ def select_dtypes(self, include=None, exclude=None): 3 False 2.0 4 True 1.0 5 False 2.0 - """ # noqa: E501 + """ # code modified from: # https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196 diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index e8928e50dd8..801020664da 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -237,7 +237,7 @@ def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype": >>> cudf_dtype = cudf.CategoricalDtype.from_pandas(pd_dtype) >>> cudf_dtype CategoricalDtype(categories=['b', 'a'], ordered=True, categories_dtype=object) - """ # noqa: E501 + """ return CategoricalDtype( categories=dtype.categories, ordered=dtype.ordered ) @@ -254,7 +254,7 @@ def to_pandas(self) -> pd.CategoricalDtype: CategoricalDtype(categories=['b', 'a'], ordered=True, categories_dtype=object) >>> dtype.to_pandas() CategoricalDtype(categories=['b', 'a'], ordered=True, categories_dtype=object) - """ # noqa: E501 + """ if self._categories is None: categories = None elif self._categories.dtype.kind == "f": @@ -399,7 +399,7 @@ def element_type(self) -> Dtype: ListDtype(float32) >>> deep_nested_type.element_type.element_type.element_type 'float32' - """ # noqa: E501 + """ if isinstance(self._typ.value_type, pa.ListType): return ListDtype.from_arrow(self._typ.value_type) elif isinstance(self._typ.value_type, pa.StructType): @@ -420,7 +420,7 @@ def leaf_type(self): ListDtype(ListDtype(ListDtype(float32))) >>> deep_nested_type.leaf_type 'float32' - """ # noqa: E501 + """ if isinstance(self.element_type, ListDtype): return self.element_type.leaf_type else: @@ -486,7 +486,7 @@ def __eq__(self, other): def __repr__(self): if isinstance(self.element_type, (ListDtype, StructDtype)): - return f"{type(self).__name__}({repr(self.element_type)})" + return f"{type(self).__name__}({self.element_type!r})" else: return f"{type(self).__name__}({self.element_type})" @@ -556,7 +556,7 @@ class StructDtype(_BaseDtype): >>> nested_struct_dtype = cudf.StructDtype({"dict_data": struct_dtype, "c": "uint8"}) >>> nested_struct_dtype StructDtype({'dict_data': StructDtype({'a': dtype('int64'), 'b': dtype('O')}), 'c': dtype('uint8')}) - """ # noqa: E501 + """ name = "struct" @@ -730,7 +730,7 @@ def itemsize(self): >>> decimal{size}_dtype = cudf.Decimal{size}Dtype(precision=9, scale=2) >>> decimal{size}_dtype Decimal{size}Dtype(precision=9, scale=2) - """ # noqa: E501 + """ ) @@ -743,7 +743,7 @@ def __init__(self, precision, scale=0): @property def str(self): - return f"{str(self.name)}({self.precision}, {self.scale})" + return f"{self.name!s}({self.precision}, {self.scale})" @property def precision(self): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 0c0f271fe6f..70789160cb6 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1392,7 +1392,7 @@ def argsort( >>> idx = cudf.Index([3, 1, 2]) >>> idx.argsort() array([1, 2, 0], dtype=int32) - """ # noqa: E501 + """ if na_position not in {"first", "last"}: raise ValueError(f"invalid na_position: {na_position}") if kind != "quicksort": diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e977f037b79..29ab3b60d9d 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1467,9 +1467,7 @@ def _iterative_groupby_apply( RuntimeWarning, ) - chunks = [ - grouped_values[s:e] for s, e in zip(offsets[:-1], offsets[1:]) - ] + chunks = [grouped_values[s:e] for s, e in itertools.pairwise(offsets)] chunk_results = [function(chk, *args) for chk in chunks] return self._post_process_chunk_results( chunk_results, group_names, group_keys, grouped_values diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 80e037c36fd..887bec19ad8 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1619,7 +1619,7 @@ def argsort( Returns ------- cupy.ndarray: The indices sorted based on input. - """ # noqa: E501 + """ return super().argsort( axis=axis, kind=kind, @@ -2218,7 +2218,7 @@ def year(self) -> Index: DatetimeIndex(['2000-12-31', '2001-12-31', '2002-12-31'], dtype='datetime64[ns]') >>> datetime_index.year Index([2000, 2001, 2002], dtype='int16') - """ # noqa: E501 + """ return self._get_dt_field("year") @property # type: ignore @@ -2237,7 +2237,7 @@ def month(self) -> Index: DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31'], dtype='datetime64[ns]') >>> datetime_index.month Index([1, 2, 3], dtype='int16') - """ # noqa: E501 + """ return self._get_dt_field("month") @property # type: ignore @@ -2256,7 +2256,7 @@ def day(self) -> Index: DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], dtype='datetime64[ns]') >>> datetime_index.day Index([1, 2, 3], dtype='int16') - """ # noqa: E501 + """ return self._get_dt_field("day") @property # type: ignore @@ -2340,7 +2340,7 @@ def microsecond(self) -> Index: dtype='datetime64[ns]') >>> datetime_index.microsecond Index([0, 1, 2], dtype='int32') - """ # noqa: E501 + """ return Index._from_column( ( # Need to manually promote column to int32 because @@ -2628,7 +2628,7 @@ def ceil(self, freq: str) -> Self: ... ]) >>> gIndex.ceil("T") DatetimeIndex(['2020-05-31 08:06:00', '1999-12-31 18:41:00'], dtype='datetime64[ns]') - """ # noqa: E501 + """ return type(self)._from_column(self._column.ceil(freq), name=self.name) @_performance_tracking @@ -2659,7 +2659,7 @@ def floor(self, freq: str) -> Self: ... ]) >>> gIndex.floor("T") DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]') - """ # noqa: E501 + """ return type(self)._from_column( self._column.floor(freq), name=self.name ) @@ -2699,7 +2699,7 @@ def round(self, freq: str) -> Self: DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01'], dtype='datetime64[ns]') >>> dt_idx.round('T') DatetimeIndex(['2001-01-01 00:05:00', '2001-01-01 00:05:00', '2001-01-01 00:05:00'], dtype='datetime64[ns]') - """ # noqa: E501 + """ return type(self)._from_column( self._column.round(freq), name=self.name ) @@ -2750,7 +2750,7 @@ def tz_localize( ``ambiguous`` and ``nonexistent`` arguments. Any ambiguous or nonexistent timestamps are converted to 'NaT'. - """ # noqa: E501 + """ result_col = self._column.tz_localize(tz, ambiguous, nonexistent) return DatetimeIndex._from_column( result_col, name=self.name, freq=self._freq @@ -2787,7 +2787,7 @@ def tz_convert(self, tz: str | None) -> Self: '2018-03-02 14:00:00+00:00', '2018-03-03 14:00:00+00:00'], dtype='datetime64[ns, Europe/London]') - """ # noqa: E501 + """ result_col = self._column.tz_convert(tz) return DatetimeIndex._from_column(result_col, name=self.name) @@ -3131,7 +3131,7 @@ class CategoricalIndex(Index): >>> cudf.CategoricalIndex( ... data=[1, 2, 3, 4], dtype=pd.CategoricalDtype([1, 2, 3]), name="a") CategoricalIndex([1, 2, 3, ], categories=[1, 2, 3], ordered=False, dtype='category', name='a') - """ # noqa: E501 + """ @_performance_tracking def __init__( diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2f8c2587937..21ac009e7ff 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -607,7 +607,7 @@ def copy(self, deep: bool = True) -> Self: ) @_performance_tracking - def equals(self, other) -> bool: # noqa: D102 + def equals(self, other) -> bool: return super().equals(other) and self.index.equals(other.index) @property @@ -5474,7 +5474,7 @@ def groupby( ), ) ) - def add(self, other, axis, level=None, fill_value=None): # noqa: D102 + def add(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5515,7 +5515,7 @@ def add(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def radd(self, other, axis, level=None, fill_value=None): # noqa: D102 + def radd(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5556,7 +5556,7 @@ def radd(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def subtract(self, other, axis, level=None, fill_value=None): # noqa: D102 + def subtract(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5599,7 +5599,7 @@ def subtract(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rsub(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rsub(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5640,7 +5640,7 @@ def rsub(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def multiply(self, other, axis, level=None, fill_value=None): # noqa: D102 + def multiply(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5683,7 +5683,7 @@ def multiply(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rmul(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rmul(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5724,7 +5724,7 @@ def rmul(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def mod(self, other, axis, level=None, fill_value=None): # noqa: D102 + def mod(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5765,7 +5765,7 @@ def mod(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rmod(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rmod(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5806,7 +5806,7 @@ def rmod(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def pow(self, other, axis, level=None, fill_value=None): # noqa: D102 + def pow(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5847,7 +5847,7 @@ def pow(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rpow(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rpow(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5888,7 +5888,7 @@ def rpow(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 + def floordiv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5929,7 +5929,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rfloordiv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -5970,7 +5970,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def truediv(self, other, axis, level=None, fill_value=None): # noqa: D102 + def truediv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -6015,7 +6015,7 @@ def truediv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 + def rtruediv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") @@ -6059,7 +6059,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 ), ) ) - def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def eq(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__eq__", fill_value=fill_value, can_reindex=True ) @@ -6099,7 +6099,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 ), ) ) - def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def ne(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__ne__", fill_value=fill_value, can_reindex=True ) @@ -6139,7 +6139,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 ), ) ) - def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def lt(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__lt__", fill_value=fill_value, can_reindex=True ) @@ -6179,7 +6179,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 ), ) ) - def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def le(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__le__", fill_value=fill_value, can_reindex=True ) @@ -6219,7 +6219,7 @@ def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 ), ) ) - def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def gt(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__gt__", fill_value=fill_value, can_reindex=True ) @@ -6259,7 +6259,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 ), ) ) - def ge(self, other, axis="columns", level=None, fill_value=None): # noqa: D102 + def ge(self, other, axis="columns", level=None, fill_value=None): return self._binaryop( other=other, op="__ge__", fill_value=fill_value, can_reindex=True ) diff --git a/python/cudf/cudf/core/mixins/scans.py b/python/cudf/cudf/core/mixins/scans.py index b0f606e32e6..289fcb84d91 100644 --- a/python/cudf/cudf/core/mixins/scans.py +++ b/python/cudf/cudf/core/mixins/scans.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. from .mixin_factory import _create_delegating_mixin @@ -12,5 +12,5 @@ "cumprod", "cummin", "cummax", - }, # noqa: E231 + }, ) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 19a53af018d..173d4e1c584 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -567,7 +567,7 @@ def levels(self) -> list[cudf.Index]: names=['a', 'b']) >>> midx.levels [Index([1, 2, 3], dtype='int64', name='a'), Index([10, 11, 12], dtype='int64', name='b')] - """ # noqa: E501 + """ return [ idx.rename(name) for idx, name in zip(self._levels, self.names) ] diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 016bd1225cd..f37b44b1100 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -1013,7 +1013,7 @@ def as_tuple(x): ca = ColumnAccessor( result, multiindex=True, - level_names=(None,) + columns._column_names, + level_names=(None, *columns._column_names), verify=False, ) return cudf.DataFrame._from_data(ca, index=index_labels) diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index f6331aa1f49..80dd0921f9c 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -304,7 +304,7 @@ def __repr__(self): # https://github.com/numpy/numpy/issues/17552 return ( f"{self.__class__.__name__}" - f"({str(self.value)}, dtype={self.dtype})" + f"({self.value!s}, dtype={self.dtype})" ) def _binop_result_dtype_or_error(self, other, op): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9b60424c924..822be69164e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -517,7 +517,7 @@ def from_categorical(cls, categorical, codes=None): 3 a dtype: category Categories (3, object): ['a', 'b', 'c'] - """ # noqa: E501 + """ col = as_column(categorical) if codes is not None: codes = as_column(codes) @@ -942,7 +942,7 @@ def drop( labels, axis, index, columns, level, inplace, errors ) - def tolist(self): # noqa: D102 + def tolist(self): raise TypeError( "cuDF does not support conversion to host memory " "via the `tolist()` method. Consider using " @@ -1087,7 +1087,7 @@ def reindex( DataFrame, followed by the original Series values. When `drop` is True, a `Series` is returned. In either case, if ``inplace=True``, no value is returned. -""", # noqa: E501 +""", example=""" >>> series = cudf.Series(['a', 'b', 'c', 'd'], index=[10, 11, 12, 13]) >>> series @@ -1196,7 +1196,7 @@ def to_frame(self, name: abc.Hashable = no_default) -> cudf.DataFrame: 12 c 13 15 d - """ # noqa: E501 + """ return self._to_frame(name=name, index=self.index) @_performance_tracking @@ -2113,7 +2113,7 @@ def data(self): >>> np.array(series.data.memoryview()) array([1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) - """ # noqa: E501 + """ return self._column.data @property # type: ignore @@ -4581,7 +4581,7 @@ def is_month_end(self) -> Series: 7 False 8 False dtype: bool - """ # noqa: E501 + """ return self._return_result_like_self(self.series._column.is_month_end) @property # type: ignore @@ -5166,7 +5166,7 @@ def components(self) -> cudf.DataFrame: 2 13000 10 12 48 712 0 0 3 0 0 35 35 656 0 0 4 37 13 12 14 234 0 0 - """ # noqa: E501 + """ ca = ColumnAccessor(self.series._column.components(), verify=False) return self.series._constructor_expanddim._from_data( ca, index=self.series.index diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 0e66f383ca0..f6d0664758f 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -83,7 +83,7 @@ def name(self, value): @property # type: ignore @_performance_tracking - def ndim(self) -> int: # noqa: D401 + def ndim(self) -> int: """Number of dimensions of the underlying data, by definition 1.""" return 1 @@ -105,12 +105,12 @@ def _column(self) -> ColumnBase: @property # type: ignore @_performance_tracking - def values(self) -> cupy.ndarray: # noqa: D102 + def values(self) -> cupy.ndarray: return self._column.values @property # type: ignore @_performance_tracking - def values_host(self) -> numpy.ndarray: # noqa: D102 + def values_host(self) -> numpy.ndarray: return self._column.values_host @classmethod diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py index 4c90c5bbba0..3a1e01caf28 100644 --- a/python/cudf/cudf/core/udf/masked_typing.py +++ b/python/cudf/cudf/core/udf/masked_typing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import operator @@ -50,7 +50,7 @@ SUPPORTED_NUMPY_TYPES = ( NUMERIC_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES | STRING_TYPES ) -supported_type_str = "\n".join(sorted(list(SUPPORTED_NUMPY_TYPES) + ["bool"])) +supported_type_str = "\n".join(sorted([*list(SUPPORTED_NUMPY_TYPES), "bool"])) _units = ["ns", "ms", "us", "s"] _datetime_cases = {types.NPDatetime(u) for u in _units} diff --git a/python/cudf/cudf/datasets.py b/python/cudf/cudf/datasets.py index dbabaacf6b5..e8d634598f4 100644 --- a/python/cudf/cudf/datasets.py +++ b/python/cudf/cudf/datasets.py @@ -6,7 +6,7 @@ import cudf from cudf._lib.transform import bools_to_mask -__all__ = ["timeseries", "randomdata"] +__all__ = ["randomdata", "timeseries"] # TODO: diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 750c6cec180..2382e9f12ed 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -1062,10 +1062,7 @@ def to_parquet( ) partition_info = ( - [ - (i, j - i) - for i, j in zip(partition_offsets, partition_offsets[1:]) - ] + [(i, j - i) for i, j in itertools.pairwise(partition_offsets)] if partition_offsets is not None else None ) @@ -1485,7 +1482,7 @@ def write_table(self, df): ) existing_cw_batch = defaultdict(dict) new_cw_paths = [] - partition_info = [(i, j - i) for i, j in zip(offsets, offsets[1:])] + partition_info = [(i, j - i) for i, j in itertools.pairwise(offsets)] for path, part_info, meta_path in zip( paths, diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index e206c8bca08..79a3a794af3 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -380,7 +380,7 @@ class option_context(ContextDecorator): >>> from cudf import option_context >>> with option_context('mode.pandas_compatible', True, 'default_float_bitwidth', 32): ... pass - """ # noqa: E501 + """ def __init__(self, *args) -> None: if len(args) % 2 != 0: diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index bacf1f7e77b..fec181e85d7 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -12,7 +12,7 @@ from .magics import load_ipython_extension from .profiler import Profiler -__all__ = ["Profiler", "load_ipython_extension", "install", "is_proxy_object"] +__all__ = ["Profiler", "install", "is_proxy_object", "load_ipython_extension"] LOADED = False @@ -57,7 +57,7 @@ def install(): current_mr = rmm.mr.get_current_device_resource() if not isinstance(current_mr, rmm.mr.CudaMemoryResource): warnings.warn( - f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", + f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={rmm_mode!s}", UserWarning, ) return diff --git a/python/cudf/cudf/pandas/__main__.py b/python/cudf/cudf/pandas/__main__.py index e0d3d9101a9..619ee822a54 100644 --- a/python/cudf/cudf/pandas/__main__.py +++ b/python/cudf/cudf/pandas/__main__.py @@ -96,7 +96,7 @@ def main(): (module,) = args.module # run the module passing the remaining arguments # as if it were run with python -m - sys.argv[:] = [module] + args.args # not thread safe? + sys.argv[:] = [module, *args.args] # not thread safe? runpy.run_module(module, run_name="__main__") elif len(args.args) >= 1: # Remove ourself from argv and continue diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 05e7d159c63..e763875adb8 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -77,8 +77,8 @@ def _pandas_util_dir(): # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/util/__init__.py res = list( set( - list(importlib.import_module("pandas.util").__dict__.keys()) - + [ + [ + *list(importlib.import_module("pandas.util").__dict__.keys()), "Appender", "Substitution", "_exceptions", @@ -219,7 +219,7 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): def _DataFrame__dir__(self): # Column names that are string identifiers are added to the dir of the # DataFrame - # See https://github.com/pandas-dev/pandas/blob/43691a2f5d235b08f0f3aa813d8fdcb7c4ce1e47/pandas/core/indexes/base.py#L878 # noqa: E501 + # See https://github.com/pandas-dev/pandas/blob/43691a2f5d235b08f0f3aa813d8fdcb7c4ce1e47/pandas/core/indexes/base.py#L878 _pd_df_dir = dir(pd.DataFrame) return _pd_df_dir + [ colname diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 40893ee2614..d32d388b975 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # noqa: E501 +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -247,7 +247,7 @@ def _fsproxy_state(self) -> _State: if metaclasses: metaclass = types.new_class( # type: ignore f"{name}_Meta", - metaclasses + (_FastSlowProxyMeta,), + (*metaclasses, _FastSlowProxyMeta), {}, ) cls = types.new_class( @@ -1301,7 +1301,7 @@ def _replace_closurevars( return functools.update_wrapper( g, f, - assigned=functools.WRAPPER_ASSIGNMENTS + ("__kwdefaults__",), + assigned=(*functools.WRAPPER_ASSIGNMENTS, "__kwdefaults__"), ) diff --git a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py index bb2fc00d9fc..e4ee0ce1ca4 100644 --- a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py +++ b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py @@ -41,7 +41,7 @@ def count_failures(log_file_name, pattern): PANDAS_TEST_PREFIX ) if fnmatch(line_module_name, pattern): - if "longrepr" in line and line["longrepr"]: + if line.get("longrepr"): if isinstance(line["longrepr"], (tuple, list)): message = line["longrepr"][2].splitlines()[0] elif isinstance(line["longrepr"], str): diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py index 99b686406fb..01a75a2efb0 100644 --- a/python/cudf/cudf/testing/dataset_generator.py +++ b/python/cudf/cudf/testing/dataset_generator.py @@ -237,9 +237,9 @@ def generate( def get_dataframe(parameters, use_threads): # Initialize seeds if parameters.seed is not None: - rng = np.random.default_rng(seed=parameters.seed) # noqa: F841 + rng = np.random.default_rng(seed=parameters.seed) else: - rng = np.random.default_rng(seed=0) # noqa: F841 + rng = np.random.default_rng(seed=0) # For each column, invoke the data generator for column_params in parameters.column_parameters: diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py index 691da224f44..81ba61b31dc 100644 --- a/python/cudf/cudf/tests/series/test_datetimelike.py +++ b/python/cudf/cudf/tests/series/test_datetimelike.py @@ -57,7 +57,7 @@ def test_localize_ambiguous(request, unit, zone_name): request.applymarker( pytest.mark.xfail( condition=(zone_name == "America/Metlakatla"), - reason="https://www.timeanddate.com/news/time/metlakatla-quits-dst.html", # noqa: E501 + reason="https://www.timeanddate.com/news/time/metlakatla-quits-dst.html", ) ) s = cudf.Series( @@ -83,7 +83,7 @@ def test_localize_nonexistent(request, unit, zone_name): request.applymarker( pytest.mark.xfail( condition=(zone_name == "America/Grand_Turk"), - reason="https://www.worldtimezone.com/dst_news/dst_news_turkscaicos03.html", # noqa: E501 + reason="https://www.worldtimezone.com/dst_news/dst_news_turkscaicos03.html", ) ) s = cudf.Series( diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 71b6bbd688d..0712a0de635 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -167,11 +167,11 @@ _operators_arithmetic = _operators_arithmetic[:1] _operators_comparison = _operators_comparison[:1] _cudf_scalar_reflected_ops = _cudf_scalar_reflected_ops[:1] - DATETIME_TYPES = {"datetime64[ms]"} # noqa: F811 - NUMERIC_TYPES = {"float32"} # noqa: F811 - FLOAT_TYPES = {"float64"} # noqa: F811 - INTEGER_TYPES = {"int16"} # noqa: F811 - TIMEDELTA_TYPES = {"timedelta64[s]"} # noqa: F811 + DATETIME_TYPES = {"datetime64[ms]"} + NUMERIC_TYPES = {"float32"} + FLOAT_TYPES = {"float64"} + INTEGER_TYPES = {"int16"} + TIMEDELTA_TYPES = {"timedelta64[s]"} # To save time, we skip tests marked "pytest.mark.xfail" pytest_xfail = pytest.mark.skipif @@ -444,7 +444,7 @@ def test_str_series_compare_num_reflected( @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("nelem", [1, 2, 100]) @pytest.mark.parametrize("cmpop", _cmpops) -@pytest.mark.parametrize("dtype", utils.NUMERIC_TYPES + ["datetime64[ms]"]) +@pytest.mark.parametrize("dtype", [*utils.NUMERIC_TYPES, "datetime64[ms]"]) @pytest.mark.parametrize("use_cudf_scalar", [True, False]) def test_series_compare_scalar( nelem, cmpop, obj_class, dtype, use_cudf_scalar diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index ba8693888b9..db24fdd2a29 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -768,7 +768,7 @@ def test_categorical_setitem_with_nan(): assert_eq(gs, expected_series) -@pytest.mark.parametrize("dtype", list(NUMERIC_TYPES) + ["object"]) +@pytest.mark.parametrize("dtype", [*list(NUMERIC_TYPES), "object"]) @pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]]) def test_series_construction_with_nulls(input_obj, dtype): dtype = cudf.dtype(dtype) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index ab0f1767cd6..f57f256d55c 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -625,7 +625,7 @@ def test_concat_series_dataframe_input_str(objs): ) @pytest.mark.parametrize("ignore_index", [True, False]) def test_concat_empty_dataframes(df, other, ignore_index): - other_pd = [df] + other + other_pd = [df, *other] gdf = cudf.from_pandas(df) other_gd = [gdf] + [cudf.from_pandas(o) for o in other] @@ -1224,7 +1224,7 @@ def test_concat_join_empty_dataframes( request, df, other, ignore_index, join, sort ): axis = 0 - other_pd = [df] + other + other_pd = [df, *other] gdf = cudf.from_pandas(df) other_gd = [gdf] + [cudf.from_pandas(o) for o in other] @@ -1312,7 +1312,7 @@ def test_concat_join_empty_dataframes_axis_1( df, other, ignore_index, axis, join, sort ): # no duplicate columns - other_pd = [df] + other + other_pd = [df, *other] gdf = cudf.from_pandas(df) other_gd = [gdf] + [cudf.from_pandas(o) for o in other] diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index ac772c47e3a..e18112d03ea 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -916,10 +916,10 @@ def test_csv_reader_nrows(tmpdir): str(fname), dtype=dtypes, skiprows=skip_rows + 1, nrows=read_rows ) assert df.shape == (read_rows, 2) - assert str(skip_rows) in list(df)[0] + assert str(skip_rows) in next(iter(df)) assert str(2 * skip_rows) in list(df)[1] for row in range(0, read_rows // sample_skip, sample_skip): - assert df[list(df)[0]][row] == row + skip_rows + 1 + assert df[next(iter(df))][row] == row + skip_rows + 1 assert df[list(df)[1]][row] == 2 * (row + skip_rows + 1) assert df[list(df)[1]][read_rows - 1] == 2 * (read_rows + skip_rows) diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index 29f2f46e3c7..381ca45de31 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -170,7 +170,7 @@ def test_column_from_ephemeral_cupy_try_lose_reference(): # CuPy array a = cudf.Series(cupy.asarray([1, 2, 3]))._column a = cudf.core.column.as_column(a) - b = cupy.asarray([1, 1, 1]) # noqa: F841 + b = cupy.asarray([1, 1, 1]) assert_eq(pd.Index([1, 2, 3]), a.to_pandas()) a = cudf.Series(cupy.asarray([1, 2, 3]))._column diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 509ee0d65a5..d04fd97dcbd 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -56,9 +56,9 @@ # If spilling is enabled globally, we skip many test permutations # to reduce running time. if get_global_manager() is not None: - ALL_TYPES = ["float32"] # noqa: F811 - DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 - NUMERIC_TYPES = ["float32"] # noqa: F811 + ALL_TYPES = ["float32"] + DATETIME_TYPES = ["datetime64[ms]"] + NUMERIC_TYPES = ["float32"] # To save time, we skip tests marked "xfail" pytest_xfail = pytest.mark.skipif @@ -452,8 +452,8 @@ def test_dataframe_basic(): df = cudf.concat([df, df2]) assert len(df) == 11 - hkeys = np.asarray(np.arange(10, dtype=np.float64).tolist() + [123]) - hvals = np.asarray(rnd_vals.tolist() + [321]) + hkeys = np.asarray([*np.arange(10, dtype=np.float64).tolist(), 123]) + hvals = np.asarray([*rnd_vals.tolist(), 321]) np.testing.assert_equal(df["keys"].to_numpy(), hkeys) np.testing.assert_equal(df["vals"].to_numpy(), hvals) @@ -1118,7 +1118,7 @@ def test_dataframe_to_string_wide(monkeypatch): 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 ... 2 2 2 2 2 2 2 2 - [3 rows x 100 columns]""" # noqa: E501 + [3 rows x 100 columns]""" ) assert got == expect @@ -2197,7 +2197,7 @@ def test_dataframe_shape_empty(): @pytest.mark.parametrize("num_cols", [1, 2, 10]) @pytest.mark.parametrize("num_rows", [1, 2, 20]) -@pytest.mark.parametrize("dtype", dtypes + ["object"]) +@pytest.mark.parametrize("dtype", [*dtypes, "object"]) @pytest.mark.parametrize("nulls", ["none", "some", "all"]) def test_dataframe_transpose(nulls, num_cols, num_rows, dtype): # In case of `bool` dtype: pandas <= 1.2.5 type-casts @@ -2842,7 +2842,7 @@ def test_arrow_round_trip(preserve_index, index): assert_eq(gdf_out, pdf_out) -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"]) +@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "bool"]) def test_cuda_array_interface(dtype): np_data = np.arange(10).astype(dtype) cupy_data = cupy.array(np_data) @@ -3707,7 +3707,7 @@ def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names(): assert result._data.names == tuple(result._data.keys()) -@pytest.mark.parametrize("dtype", dtypes + ["category"]) +@pytest.mark.parametrize("dtype", [*dtypes, "category"]) def test_dataframe_0_row_dtype(dtype): if dtype == "category": data = pd.Series(["a", "b", "c", "d", "e"], dtype="category") @@ -7910,10 +7910,10 @@ def test_dataframe_concat_dataframe_lists(df, other, sort, ignore_index): with _hide_concat_empty_dtype_warning(): expected = pd.concat( - [pdf] + other_pd, sort=sort, ignore_index=ignore_index + [pdf, *other_pd], sort=sort, ignore_index=ignore_index ) actual = cudf.concat( - [gdf] + other_gd, sort=sort, ignore_index=ignore_index + [gdf, *other_gd], sort=sort, ignore_index=ignore_index ) # In some cases, Pandas creates an empty Index([], dtype="object") for @@ -8026,10 +8026,10 @@ def test_dataframe_concat_lists(df, other, sort, ignore_index): with _hide_concat_empty_dtype_warning(): expected = pd.concat( - [pdf] + other_pd, sort=sort, ignore_index=ignore_index + [pdf, *other_pd], sort=sort, ignore_index=ignore_index ) actual = cudf.concat( - [gdf] + other_gd, sort=sort, ignore_index=ignore_index + [gdf, *other_gd], sort=sort, ignore_index=ignore_index ) if expected.shape != df.shape: @@ -10892,7 +10892,7 @@ def test_dataframe_from_ndarray_dup_columns(): @pytest.mark.parametrize("contains", ["a", 0, None, np.nan, cudf.NA]) @pytest.mark.parametrize("other_names", [[], ["b", "c"], [1, 2]]) def test_dataframe_contains(name, contains, other_names): - column_names = [name] + other_names + column_names = [name, *other_names] gdf = cudf.DataFrame({c: [0] for c in column_names}) pdf = pd.DataFrame({c: [0] for c in column_names}) diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py index f93bd2c5d32..6a9dd4c4a66 100644 --- a/python/cudf/cudf/tests/test_feather.py +++ b/python/cudf/cudf/tests/test_feather.py @@ -16,7 +16,7 @@ @pytest.fixture(params=[0, 1, 10, 100]) def pdf(request): rng = np.random.default_rng(seed=0) - types = NUMERIC_TYPES + ["bool"] + types = [*NUMERIC_TYPES, "bool"] nrows = request.param # Create a pandas dataframe with random data of mixed types diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index b2da9bf8e33..b93a098264f 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -1221,7 +1221,7 @@ def test_groupby_column_numeral(): pd.Series([0, 2, 0]), pd.Series([0, 2, 0], index=[0, 2, 1]), ], -) # noqa: E501 +) def test_groupby_external_series(series): pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]}) gdf = DataFrame.from_pandas(pdf) @@ -2015,8 +2015,10 @@ def test_multi_agg(): @pytest.mark.parametrize( "agg", ( - list(itertools.combinations(["count", "max", "min", "nunique"], 2)) - + [ + [ + *list( + itertools.combinations(["count", "max", "min", "nunique"], 2) + ), {"b": "min", "c": "mean"}, {"b": "max", "c": "mean"}, {"b": "count", "c": "mean"}, diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py index 430ed973f19..4921b7b51fc 100644 --- a/python/cudf/cudf/tests/test_hdf.py +++ b/python/cudf/cudf/tests/test_hdf.py @@ -16,7 +16,7 @@ @pytest.fixture(params=[0, 1, 10, 100]) def pdf(request): - types = set(NUMERIC_TYPES + ["datetime64[ns]"] + ["bool"]) - set( + types = set([*NUMERIC_TYPES, "datetime64[ns]", "bool"]) - set( UNSIGNED_TYPES ) typer = {"col_" + val: val for val in types} diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 24d42d9eb4c..11f6d687931 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1274,7 +1274,7 @@ def test_index_append_list(data, other): @pytest.mark.parametrize("data", [[1, 2, 3, 4], []]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) @pytest.mark.parametrize("name", [1, "a", None]) def test_index_basic(data, dtype, name): @@ -1399,7 +1399,7 @@ def test_multiindex_append(data, other): @pytest.mark.parametrize("data", [[1, 2, 3, 4], []]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_empty(data, dtype): pdi = pd.Index(data, dtype=dtype) @@ -1410,7 +1410,7 @@ def test_index_empty(data, dtype): @pytest.mark.parametrize("data", [[1, 2, 3, 4], []]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_size(data, dtype): pdi = pd.Index(data, dtype=dtype) @@ -1421,7 +1421,7 @@ def test_index_size(data, dtype): @pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], [], [1], [1, 2, 3]]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_drop_duplicates(data, dtype): pdi = pd.Index(data, dtype=dtype) @@ -1437,7 +1437,7 @@ def test_dropna_bad_how(): @pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], []]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_tolist(data, dtype): gdi = cudf.Index(data, dtype=dtype) @@ -1455,7 +1455,7 @@ def test_index_tolist(data, dtype): @pytest.mark.parametrize("data", [[], [1], [1, 2, 3]]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_iter_error(data, dtype): gdi = cudf.Index(data, dtype=dtype) @@ -1473,7 +1473,7 @@ def test_index_iter_error(data, dtype): @pytest.mark.parametrize("data", [[], [1], [1, 2, 3, 4, 5]]) @pytest.mark.parametrize( - "dtype", NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] + "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"] ) def test_index_values_host(data, dtype): gdi = cudf.Index(data, dtype=dtype) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index f6941ce7fae..f8e61651f37 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -1527,7 +1527,7 @@ def test_categorical_typecast_outer(): result = left.merge(right, how="outer", on="key") -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["str"]) +@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "str"]) def test_categorical_typecast_inner_one_cat(dtype): data = np.array([1, 2, 3], dtype=dtype) @@ -1538,7 +1538,7 @@ def test_categorical_typecast_inner_one_cat(dtype): assert result["key"].dtype == left["key"].dtype.categories.dtype -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["str"]) +@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "str"]) def test_categorical_typecast_left_one_cat(dtype): data = np.array([1, 2, 3], dtype=dtype) @@ -1549,7 +1549,7 @@ def test_categorical_typecast_left_one_cat(dtype): assert result["key"].dtype == left["key"].dtype -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["str"]) +@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "str"]) def test_categorical_typecast_outer_one_cat(dtype): data = np.array([1, 2, 3], dtype=dtype) diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index b48be6b2c2f..aaa8d7d07ee 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -58,12 +58,14 @@ def gdf(pdf): @pytest.fixture(params=[0, 1, 10, 100]) def gdf_writer_types(request): # datetime64[us], datetime64[ns] are unsupported due to a bug in parser - types = ( - NUMERIC_TYPES - + ["datetime64[s]", "datetime64[ms]"] - + TIMEDELTA_TYPES - + ["bool", "str"] - ) + types = [ + *NUMERIC_TYPES, + "datetime64[s]", + "datetime64[ms]", + *TIMEDELTA_TYPES, + "bool", + "str", + ] typer = {"col_" + val: val for val in types} ncols = len(types) nrows = request.param diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 41c1c3ccb20..c4b4ef60184 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -606,7 +606,7 @@ def normalized_equals(value1, value2): def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq): from pyarrow import orc - supported_stat_types = supported_numpy_dtypes + ["str"] + supported_stat_types = [*supported_numpy_dtypes, "str"] # Writing bool columns to multiple row groups is disabled # until #6763 is fixed if nrows == 100000: @@ -681,7 +681,7 @@ def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq): def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq): from pyarrow import orc - supported_stat_types = supported_numpy_dtypes + ["str"] + supported_stat_types = [*supported_numpy_dtypes, "str"] # Writing bool columns to multiple row groups is disabled # until #6763 is fixed if nrows == 200000: diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 659d2ebd89a..de3636f7526 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2313,7 +2313,7 @@ def test_parquet_writer_criteo(tmpdir): cont_names = ["I" + str(x) for x in range(1, 14)] cat_names = ["C" + str(x) for x in range(1, 27)] - cols = ["label"] + cont_names + cat_names + cols = ["label", *cont_names, *cat_names] df = cudf.read_csv(fname, sep="\t", names=cols, byte_range=(0, 1000000000)) df = df.drop(columns=cont_names) diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index d9f4ceaf3f7..8ea0d205e8b 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -31,7 +31,7 @@ [ cudf.Series([5, 1, 2, 3, None, 243, None, 4]), cudf.Series(["one", "two", "three", None, "one"], dtype="category"), - cudf.Series(list(range(400)) + [None]), + cudf.Series([*list(range(400)), None]), ], ) @pytest.mark.parametrize( @@ -128,7 +128,7 @@ def test_series_replace(): assert_eq(a8, sr8.to_numpy()) # large input containing null - sr9 = cudf.Series(list(range(400)) + [None]) + sr9 = cudf.Series([*list(range(400)), None]) sr10 = sr9.replace([22, 323, 27, 0], None) assert sr10.null_count == 5 assert len(sr10.dropna().to_numpy()) == (401 - 5) diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 53fe5f7f30d..5cebdf37c9f 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -28,9 +28,9 @@ # If spilling is enabled globally, we skip many test permutations # to reduce running time. if get_global_manager() is not None: - ALL_TYPES = ["float32"] # noqa: F811 - DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 - NUMERIC_TYPES = ["float32"] # noqa: F811 + ALL_TYPES = ["float32"] + DATETIME_TYPES = ["datetime64[ms]"] + NUMERIC_TYPES = ["float32"] # To save time, we skip tests marked "pytest.mark.xfail" pytest_xfail = pytest.mark.skipif diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 7f0a4902ed1..6c0e5099f68 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -282,8 +282,8 @@ def test_series_concat_list_series_with_index(data, others, ignore_index): other_ps = others other_gs = [cudf.from_pandas(obj) for obj in others] - expected = pd.concat([psr] + other_ps, ignore_index=ignore_index) - actual = cudf.concat([gsr] + other_gs, ignore_index=ignore_index) + expected = pd.concat([psr, *other_ps], ignore_index=ignore_index) + actual = cudf.concat([gsr, *other_gs], ignore_index=ignore_index) assert_eq(expected, actual) @@ -1942,7 +1942,7 @@ def test_diff_many_dtypes(data): @pytest.mark.parametrize("num_rows", [1, 100]) @pytest.mark.parametrize("num_bins", [1, 10]) @pytest.mark.parametrize("right", [True, False]) -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"]) +@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "bool"]) @pytest.mark.parametrize("series_bins", [True, False]) def test_series_digitize(num_rows, num_bins, right, dtype, series_bins): rng = np.random.default_rng(seed=0) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 5406836ba61..6119fda0752 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -472,7 +472,7 @@ def test_loc_setitem_series_index_alignment_13031(other_index): ), ], ) -@pytest.mark.parametrize("arg", list(range(-20, 20)) + [5.6, 3.1]) +@pytest.mark.parametrize("arg", [*list(range(-20, 20)), 5.6, 3.1]) def test_series_set_item_range_index(ps, arg): gsr = cudf.from_pandas(ps) psr = ps.copy(deep=True) diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py index 7af83a99d60..13d98e43ddc 100644 --- a/python/cudf/cudf/tests/test_spilling.py +++ b/python/cudf/cudf/tests/test_spilling.py @@ -669,7 +669,7 @@ def test_statistics_expose(manager: SpillManager): # Expose the first buffer buffers[0].owner.mark_exposed() assert len(manager.statistics.exposes) == 1 - stat = list(manager.statistics.exposes.values())[0] + stat = next(iter(manager.statistics.exposes.values())) assert stat.count == 1 assert stat.total_nbytes == buffers[0].nbytes assert stat.spilled_nbytes == 0 diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 9700f548a16..bdc9e695844 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -536,8 +536,8 @@ def test_string_cat(ps_gs, others, sep, na_rep, index): assert_eq(expect, got) - expect = ps.str.cat(others=[ps.index] + [ps.index], sep=sep, na_rep=na_rep) - got = gs.str.cat(others=[gs.index] + [gs.index], sep=sep, na_rep=na_rep) + expect = ps.str.cat(others=[ps.index, ps.index], sep=sep, na_rep=na_rep) + got = gs.str.cat(others=[gs.index, gs.index], sep=sep, na_rep=na_rep) assert_eq(expect, got) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 86ed749772f..e1ab75b6b33 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -822,7 +822,7 @@ >>> cudf.read_json(json_str, engine='cudf', lines=True, dtype={'k1':float, 'k2':cudf.ListDtype(int)}) k1 k2 0 1.0 [1] -""" # noqa: E501 +""" doc_read_json: Callable = docfmt_partial(docstring=_docstring_read_json) _docstring_to_json = """ diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index 78aeac425f7..8966789fee8 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -64,7 +64,7 @@ def query_parser(text): Returns ------- info: a `dict` of the parsed info - """ # noqa + """ # convert any '@' to text = text.replace("@", ENVREF_PREFIX) tree = ast.parse(text) @@ -249,7 +249,7 @@ def query_execute(df, expr, callenv): nrows = len(df) out = column_empty(nrows, dtype=np.bool_) # run kernel - args = [out] + colarrays + envargs + args = [out, *colarrays, *envargs] with _CUDFNumbaConfig(): kernel.forall(nrows)(*args) out_mask = applyutils.make_aggregate_nullmask(df, columns=columns) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index e6d252b8807..c83c1cbe895 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -210,7 +210,7 @@ class GetAttrGetItemMixin: # Tracking of protected keys by each subclass is necessary to make the # `__getattr__`->`__getitem__` call safe. See - # https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html # noqa: E501 + # https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html # for an explanation. In brief, defining the `_PROTECTED_KEYS` allows this # class to avoid calling `__getitem__` inside `__getattr__` when # `__getitem__` will internally again call `__getattr__`, resulting in an diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py index cc17e71039a..20eb2404b77 100644 --- a/python/dask_cudf/dask_cudf/__init__.py +++ b/python/dask_cudf/dask_cudf/__init__.py @@ -3,15 +3,15 @@ import warnings from importlib import import_module -from dask import config import dask.dataframe as dd -from dask.dataframe import from_delayed # noqa: E402 +from dask import config +from dask.dataframe import from_delayed -import cudf # noqa: E402 +import cudf -from . import backends # noqa: E402, F401 -from ._version import __git_commit__, __version__ # noqa: E402, F401 -from .core import concat, from_cudf, DataFrame, Index, Series # noqa: F401 +from . import backends # noqa: F401 +from ._version import __git_commit__, __version__ # noqa: F401 +from .core import DataFrame, Index, Series, concat, from_cudf QUERY_PLANNING_ON = dd.DASK_EXPR_ENABLED @@ -56,17 +56,17 @@ def inner_func(*args, **kwargs): if QUERY_PLANNING_ON: + from . import io from ._expr.expr import _patch_dask_expr - from . import io # noqa: F401 groupby_agg = _deprecated_api("dask_cudf.groupby_agg") read_text = DataFrame.read_text _patch_dask_expr() else: + from . import io # noqa: F401 from ._legacy.groupby import groupby_agg # noqa: F401 from ._legacy.io import read_text # noqa: F401 - from . import io # noqa: F401 to_orc = _deprecated_api( @@ -78,10 +78,10 @@ def inner_func(*args, **kwargs): __all__ = [ "DataFrame", - "Series", "Index", - "from_cudf", + "Series", "concat", + "from_cudf", "from_delayed", ] diff --git a/python/dask_cudf/dask_cudf/_expr/collection.py b/python/dask_cudf/dask_cudf/_expr/collection.py index fdf7d8630e9..0071981efd2 100644 --- a/python/dask_cudf/dask_cudf/_expr/collection.py +++ b/python/dask_cudf/dask_cudf/_expr/collection.py @@ -217,8 +217,9 @@ def _create_array_collection_with_meta(expr): name = result._name meta = result._meta divisions = result.divisions - chunks = ((np.nan,) * (len(divisions) - 1),) + tuple( - (d,) for d in meta.shape[1:] + chunks = ( + (np.nan,) * (len(divisions) - 1), + *tuple((d,) for d in meta.shape[1:]), ) if len(chunks) > 1: if isinstance(dsk, HighLevelGraph): @@ -228,11 +229,11 @@ def _create_array_collection_with_meta(expr): layer = dsk if isinstance(layer, Blockwise): layer.new_axes["j"] = chunks[1][0] - layer.output_indices = layer.output_indices + ("j",) + layer.output_indices = (*layer.output_indices, "j") else: suffix = (0,) * (len(chunks) - 1) for i in range(len(chunks[0])): - layer[(name, i) + suffix] = layer.pop((name, i)) + layer[(name, i, *suffix)] = layer.pop((name, i)) return da.Array(dsk, name=name, chunks=chunks, meta=meta) diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 7d6d5c05cbe..5fd217209ec 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -10,7 +10,7 @@ # This module provides backward compatibility for legacy import patterns. if dd.DASK_EXPR_ENABLED: - from dask_cudf._expr.collection import ( # noqa: E402 + from dask_cudf._expr.collection import ( DataFrame, Index, Series, @@ -19,7 +19,7 @@ from dask_cudf._legacy.core import DataFrame, Index, Series # noqa: F401 -concat = dd.concat # noqa: F401 +concat = dd.concat @_dask_cudf_performance_tracking diff --git a/python/dask_cudf/dask_cudf/io/__init__.py b/python/dask_cudf/dask_cudf/io/__init__.py index 212951336c9..9bca33e414a 100644 --- a/python/dask_cudf/dask_cudf/io/__init__.py +++ b/python/dask_cudf/dask_cudf/io/__init__.py @@ -1,9 +1,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from dask_cudf import _deprecated_api, QUERY_PLANNING_ON - -from . import csv, orc, json, parquet, text # noqa: F401 +from dask_cudf import QUERY_PLANNING_ON, _deprecated_api +from . import csv, json, orc, parquet, text # noqa: F401 read_csv = _deprecated_api( "dask_cudf.io.read_csv", new_api="dask_cudf.read_csv" diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index bbedd046760..acb52e98680 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -31,7 +31,7 @@ from dask_cudf import QUERY_PLANNING_ON, _deprecated_api # Dask-expr imports CudfEngine from this module -from dask_cudf._legacy.io.parquet import CudfEngine # noqa: F401 +from dask_cudf._legacy.io.parquet import CudfEngine if TYPE_CHECKING: from collections.abc import MutableMapping diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index 918290aa6fa..9bd3b506db0 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -58,7 +58,7 @@ def pdf(request): # deprecation check for "collect". @pytest.mark.parametrize( "aggregation", - sorted(tuple(set(OPTIMIZED_AGGS) - {list}) + ("collect",)), + sorted((*tuple(set(OPTIMIZED_AGGS) - {list}), "collect")), ) @pytest.mark.parametrize("series", [False, True]) def test_groupby_basic(series, aggregation, pdf): diff --git a/python/libcudf/libcudf/__init__.py b/python/libcudf/libcudf/__init__.py index 10c476cbe89..4077fa8fbf9 100644 --- a/python/libcudf/libcudf/__init__.py +++ b/python/libcudf/libcudf/__init__.py @@ -14,3 +14,5 @@ from libcudf._version import __git_commit__, __version__ from libcudf.load import load_library + +__all__ = ["__git_commit__", "__version__", "load_library"] diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py index 62a2170f83e..8ea176a6b07 100644 --- a/python/pylibcudf/pylibcudf/__init__.py +++ b/python/pylibcudf/pylibcudf/__init__.py @@ -65,8 +65,8 @@ "aggregation", "binaryop", "column_factories", - "contiguous_split", "concatenate", + "contiguous_split", "copying", "datetime", "experimental", @@ -83,6 +83,7 @@ "lists", "merge", "null_mask", + "nvtext", "partitioning", "quantiles", "reduce", @@ -91,13 +92,12 @@ "rolling", "round", "search", + "sorting", "stream_compaction", "strings", - "sorting", "traits", "transform", "transpose", "types", "unary", - "nvtext", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 4f125d3a733..d88a7d4b825 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -15,11 +15,11 @@ ) __all__ = [ + "byte_pair_encode", "edit_distance", "generate_ngrams", "jaccard", "minhash", - "byte_pair_encode", "ngrams_tokenize", "normalize", "replace", diff --git a/python/pylibcudf/pylibcudf/tests/io/test_csv.py b/python/pylibcudf/pylibcudf/tests/io/test_csv.py index 1cbaac57315..555ca2fb02c 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_csv.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_csv.py @@ -281,7 +281,7 @@ def test_read_csv_header(csv_table_data, source_or_sink, header): new_tbl_dict = {} for i, (name, vals) in enumerate(tbl_dict.items()): str_vals = [str(val) for val in vals] - new_tbl_dict[str(i)] = [name] + str_vals + new_tbl_dict[str(i)] = [name, *str_vals] pa_table = pa.table(new_tbl_dict) assert_table_and_meta_eq(