Skip to content

Commit

Permalink
Turn on RUF rules and apply fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
wence- committed Nov 27, 2024
1 parent edaf904 commit 32479ca
Show file tree
Hide file tree
Showing 71 changed files with 239 additions and 230 deletions.
20 changes: 14 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,25 +95,33 @@ select = [
"UP035",
# usage of legacy `np.random` function calls
"NPY002",
# Ruff-specific rules
"RUF",
]
ignore = [
# whitespace before :
"E203",
# line-too-long (due to Copyright header)
"E501",
# String contains ambiguous character
"RUF001",
# Parenthesize `a and b` expressions when chaining `and` and `or`
# together, to make the precedence clear
"RUF021",
# Mutable class attributes should be annotated with
# `typing.ClassVar`
"RUF012",
]
fixable = ["ALL"]
exclude = [
# TODO: Remove this in a follow-up where we fix __all__.
"__init__.py",
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
]

[tool.ruff.format]
exclude = [
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
]

[tool.ruff.lint.per-file-ignores]
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/benchmarks/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def pytest_collection_modifyitems(session, config, items):
items[:] = list(filter(is_pandas_compatible, items))

else:
import cupy # noqa: W0611, F401
import cupy # noqa: F401

import cudf # noqa: W0611, F401
import cudf # noqa: F401

def pytest_collection_modifyitems(session, config, items):
pass
Expand Down
16 changes: 7 additions & 9 deletions python/cudf/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,16 @@
# into the main repo.
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))

# Turn off isort until we upgrade to 5.8.0
# https://github.com/pycqa/isort/issues/1594
from config import ( # noqa: W0611, E402, F401
from config import (
NUM_COLS,
NUM_ROWS,
collect_ignore,
cudf, # noqa: W0611, E402, F401
pytest_collection_modifyitems,
pytest_sessionfinish,
pytest_sessionstart,
collect_ignore, # noqa: F401
cudf,
pytest_collection_modifyitems, # noqa: F401
pytest_sessionfinish, # noqa: F401
pytest_sessionstart, # noqa: F401
)
from utils import ( # noqa: E402
from utils import (
OrderedSet,
collapse_fixtures,
column_generators,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@


__all__ = [
"NA",
"BaseIndex",
"CategoricalDtype",
"CategoricalIndex",
Expand All @@ -114,7 +115,6 @@
"IntervalIndex",
"ListDtype",
"MultiIndex",
"NA",
"NaT",
"RangeIndex",
"Scalar",
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def start(self):
else:
self._data_handler.set_rand_params(self.params)
kwargs = self._data_handler._current_params["test_kwargs"]
logging.info(f"Parameters passed: {str(kwargs)}")
logging.info(f"Parameters passed: {kwargs!s}")
self._target(file_name, **kwargs)
except KeyboardInterrupt:
logging.info(
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def memory_usage(self, deep=False):
"""
raise NotImplementedError

def tolist(self): # noqa: D102
def tolist(self):
raise TypeError(
"cuDF does not support conversion to host memory "
"via the `tolist()` method. Consider using "
Expand All @@ -148,7 +148,7 @@ def name(self):
raise NotImplementedError

@property # type: ignore
def ndim(self) -> int: # noqa: D401
def ndim(self) -> int:
"""Number of dimensions of the underlying data, by definition 1."""
return 1

Expand Down Expand Up @@ -265,7 +265,7 @@ def get_loc(self, key):
slice(1, 3, None)
>>> multi_index.get_loc(('b', 'e'))
1
""" # noqa: E501
"""

def max(self):
"""The maximum value of the index."""
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/buffer/spill_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_rmm_memory_resource_stack(
"""

if hasattr(mr, "upstream_mr"):
return [mr] + get_rmm_memory_resource_stack(mr.upstream_mr)
return [mr, *get_rmm_memory_resource_stack(mr.upstream_mr)]
return [mr]


Expand Down Expand Up @@ -275,7 +275,7 @@ def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool:
print(
f"[WARNING] RMM allocation of {format_bytes(nbytes)} bytes "
"failed, spill-on-demand couldn't find any device memory to "
f"spill:\n{repr(self)}\ntraceback:\n{get_traceback()}\n"
f"spill:\n{self!r}\ntraceback:\n{get_traceback()}\n"
f"{self.statistics}"
)
return False # Since we didn't find anything to spill, we give up
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/buffer/spillable_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def __str__(self) -> str:
f"<{self.__class__.__name__} size={format_bytes(self._size)} "
f"spillable={self.spillable} exposed={self.exposed} "
f"num-spill-locks={len(self._spill_locks)} "
f"ptr={ptr_info} owner={repr(self._owner)}>"
f"ptr={ptr_info} owner={self._owner!r}>"
)


Expand Down
20 changes: 11 additions & 9 deletions python/cudf/cudf/core/column/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@
deserialize_columns,
serialize_columns,
)
from cudf.core.column.datetime import DatetimeColumn # noqa: F401
from cudf.core.column.datetime import DatetimeTZColumn # noqa: F401
from cudf.core.column.lists import ListColumn # noqa: F401
from cudf.core.column.numerical import NumericalColumn # noqa: F401
from cudf.core.column.string import StringColumn # noqa: F401
from cudf.core.column.struct import StructColumn # noqa: F401
from cudf.core.column.timedelta import TimeDeltaColumn # noqa: F401
from cudf.core.column.interval import IntervalColumn # noqa: F401
from cudf.core.column.decimal import ( # noqa: F401
from cudf.core.column.datetime import (
DatetimeColumn,
DatetimeTZColumn,
)
from cudf.core.column.decimal import (
Decimal32Column,
Decimal64Column,
Decimal128Column,
DecimalBaseColumn,
)
from cudf.core.column.interval import IntervalColumn
from cudf.core.column.lists import ListColumn
from cudf.core.column.numerical import NumericalColumn
from cudf.core.column.string import StringColumn
from cudf.core.column.struct import StructColumn
from cudf.core.column.timedelta import TimeDeltaColumn
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def _get_decimal_type(
`op` for the given dtypes.
For precision & scale calculations see : https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql
""" # noqa: E501
"""

# This should at some point be hooked up to libcudf's
# binary_operation_fixed_point_scale
Expand Down Expand Up @@ -506,8 +506,8 @@ def _get_decimal_type(
# if we've reached this point, we cannot create a decimal type without
# overflow; raise an informative error
raise ValueError(
f"Performing {op} between columns of type {repr(lhs_dtype)} and "
f"{repr(rhs_dtype)} would result in overflow"
f"Performing {op} between columns of type {lhs_dtype!r} and "
f"{rhs_dtype!r} would result in overflow"
)


Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
# If `other` is a Python integer and it is out-of-bounds
# promotion could fail but we can trivially define the result
# in terms of `notnull` or `NULL_NOT_EQUALS`.
if type(other) is int and self.dtype.kind in "iu": # noqa: E721
if type(other) is int and self.dtype.kind in "iu":
truthiness = None
iinfo = np.iinfo(self.dtype)
if iinfo.min > other:
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def join(
2 <NA>
3 c-d
dtype: object
""" # noqa E501
"""
if sep is None:
sep = ""

Expand Down Expand Up @@ -694,7 +694,7 @@ def extract(
The `flags` parameter currently only supports re.DOTALL and
re.MULTILINE.
""" # noqa W605
"""
if not _is_supported_regex_flags(flags):
raise NotImplementedError(
"unsupported value for `flags` parameter"
Expand Down Expand Up @@ -830,7 +830,7 @@ def contains(
value is set.
The `flags` parameter currently only supports re.DOTALL and
re.MULTILINE.
""" # noqa W605
"""
if na is not np.nan:
raise NotImplementedError("`na` parameter is not yet supported")
if regex and isinstance(pat, re.Pattern):
Expand Down Expand Up @@ -3675,7 +3675,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex:
- Some characters need to be escaped when passing
in pat. e.g. ``'$'`` has a special meaning in regex
and must be escaped when finding this literal character.
""" # noqa W605
"""
if isinstance(pat, re.Pattern):
flags = pat.flags & ~re.U
pat = pat.pattern
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def components(self) -> dict[str, ColumnBase]:
2 13000 10 12 48 712 0 0
3 0 0 35 35 656 0 0
4 37 13 12 14 234 0 0
""" # noqa: E501
"""

date_meta = {
"seconds": ["m", "s"],
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def from_zip(cls, data: abc.Iterator):
def __getitem__(self, key):
"""Recursively apply dict.__getitem__ for nested elements."""
# As described in the pandas docs
# https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index # noqa: E501
# https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index
# accessing nested elements of a multiindex must be done using a tuple.
# Lists and other sequences are treated as accessing multiple elements
# at the top level of the index.
Expand All @@ -62,10 +62,10 @@ def _to_flat_dict_inner(d: dict, parents: tuple = ()):
for k, v in d.items():
if not isinstance(v, d.__class__):
if parents:
k = parents + (k,)
k = (*parents, k)
yield (k, v)
else:
yield from _to_flat_dict_inner(d=v, parents=parents + (k,))
yield from _to_flat_dict_inner(d=v, parents=(*parents, k))


class ColumnAccessor(abc.MutableMapping):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def cut(
if len(set(bins)) is not len(bins):
if duplicates == "raise":
raise ValueError(
f"Bin edges must be unique: {repr(bins)}.\n"
f"Bin edges must be unique: {bins!r}.\n"
f"You can drop duplicate edges by setting the 'duplicates'"
"kwarg"
)
Expand Down
16 changes: 8 additions & 8 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2248,7 +2248,7 @@ def from_dict(
n1 n2
a b 1 3
c 2 4
""" # noqa: E501
"""

orient = orient.lower()
if orient == "index":
Expand Down Expand Up @@ -2405,7 +2405,7 @@ def to_dict(
>>> df.to_dict('records', into=dd)
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
""" # noqa: E501
"""
orient = orient.lower()

if orient == "series":
Expand Down Expand Up @@ -3033,7 +3033,7 @@ def set_index(
if len(keys) == 0:
raise ValueError("No valid columns to be added to index.")
if append:
keys = [self.index] + keys
keys = [self.index, *keys]

# Preliminary type check
labels_not_found = []
Expand Down Expand Up @@ -3099,7 +3099,7 @@ def set_index(
@_performance_tracking
def fillna(
self, value=None, method=None, axis=None, inplace=False, limit=None
): # noqa: D102
):
if isinstance(value, (pd.Series, pd.DataFrame)):
value = cudf.from_pandas(value)
if isinstance(value, cudf.Series):
Expand Down Expand Up @@ -3580,7 +3580,7 @@ def drop_duplicates(
1 Yum Yum cup 4.0
2 Indomie cup 3.5
4 Indomie pack 5.0
""" # noqa: E501
"""
outdf = super().drop_duplicates(
subset=subset,
keep=keep,
Expand Down Expand Up @@ -4860,7 +4860,7 @@ def map(

if na_action not in {"ignore", None}:
raise ValueError(
f"na_action must be 'ignore' or None. Got {repr(na_action)}"
f"na_action must be 'ignore' or None. Got {na_action!r}"
)

if na_action == "ignore":
Expand Down Expand Up @@ -6145,7 +6145,7 @@ def quantile(
non-numeric types and result is expected to be a Series in case of
Pandas. cuDF will return a DataFrame as it doesn't support mixed
types under Series.
""" # noqa: E501
"""
if axis not in (0, None):
raise NotImplementedError("axis is not implemented yet")

Expand Down Expand Up @@ -6844,7 +6844,7 @@ def select_dtypes(self, include=None, exclude=None):
3 False 2.0
4 True 1.0
5 False 2.0
""" # noqa: E501
"""

# code modified from:
# https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196
Expand Down
Loading

0 comments on commit 32479ca

Please sign in to comment.