Turn on RUF rules and apply fixes

rapidsai · Nov 27, 2024 · 32479ca · 32479ca
1 parent edaf904
commit 32479ca
Show file tree

Hide file tree

Showing 71 changed files with 239 additions and 230 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -95,25 +95,33 @@ select = [
     "UP035",
     # usage of legacy `np.random` function calls
     "NPY002",
+    # Ruff-specific rules
+    "RUF",
 ]
 ignore = [
     # whitespace before :
     "E203",
     # line-too-long (due to Copyright header)
     "E501",
+    # String contains ambiguous character
+    "RUF001",
+    # Parenthesize `a and b` expressions when chaining `and` and `or`
+    # together, to make the precedence clear
+    "RUF021",
+    # Mutable class attributes should be annotated with
+    # `typing.ClassVar`
+    "RUF012",
 ]
 fixable = ["ALL"]
 exclude = [
-  # TODO: Remove this in a follow-up where we fix __all__.
-  "__init__.py",
-  # TODO: https://github.com/rapidsai/cudf/issues/17461
-  "**/*.ipynb",
+    # TODO: https://github.com/rapidsai/cudf/issues/17461
+    "**/*.ipynb",
 ]
 
 [tool.ruff.format]
 exclude = [
-  # TODO: https://github.com/rapidsai/cudf/issues/17461
-  "**/*.ipynb",
+    # TODO: https://github.com/rapidsai/cudf/issues/17461
+    "**/*.ipynb",
 ]
 
 [tool.ruff.lint.per-file-ignores]

diff --git a/python/cudf/benchmarks/common/config.py b/python/cudf/benchmarks/common/config.py
@@ -42,9 +42,9 @@ def pytest_collection_modifyitems(session, config, items):
         items[:] = list(filter(is_pandas_compatible, items))
 
 else:
-    import cupy  # noqa: W0611, F401
+    import cupy  # noqa: F401
 
-    import cudf  # noqa: W0611, F401
+    import cudf  # noqa: F401
 
     def pytest_collection_modifyitems(session, config, items):
         pass

diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py
@@ -56,18 +56,16 @@
 # into the main repo.
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))
 
-# Turn off isort until we upgrade to 5.8.0
-# https://github.com/pycqa/isort/issues/1594
-from config import (  # noqa: W0611, E402, F401
+from config import (
     NUM_COLS,
     NUM_ROWS,
-    collect_ignore,
-    cudf,  # noqa: W0611, E402, F401
-    pytest_collection_modifyitems,
-    pytest_sessionfinish,
-    pytest_sessionstart,
+    collect_ignore,  # noqa: F401
+    cudf,
+    pytest_collection_modifyitems,  # noqa: F401
+    pytest_sessionfinish,  # noqa: F401
+    pytest_sessionstart,  # noqa: F401
 )
-from utils import (  # noqa: E402
+from utils import (
     OrderedSet,
     collapse_fixtures,
     column_generators,

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
@@ -99,6 +99,7 @@
 
 
 __all__ = [
+    "NA",
     "BaseIndex",
     "CategoricalDtype",
     "CategoricalIndex",
@@ -114,7 +115,6 @@
     "IntervalIndex",
     "ListDtype",
     "MultiIndex",
-    "NA",
     "NaT",
     "RangeIndex",
     "Scalar",

diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py
@@ -95,7 +95,7 @@ def start(self):
                 else:
                     self._data_handler.set_rand_params(self.params)
                     kwargs = self._data_handler._current_params["test_kwargs"]
-                    logging.info(f"Parameters passed: {str(kwargs)}")
+                    logging.info(f"Parameters passed: {kwargs!s}")
                     self._target(file_name, **kwargs)
             except KeyboardInterrupt:
                 logging.info(

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
@@ -133,7 +133,7 @@ def memory_usage(self, deep=False):
         """
         raise NotImplementedError
 
-    def tolist(self):  # noqa: D102
+    def tolist(self):
         raise TypeError(
             "cuDF does not support conversion to host memory "
             "via the `tolist()` method. Consider using "
@@ -148,7 +148,7 @@ def name(self):
         raise NotImplementedError
 
     @property  # type: ignore
-    def ndim(self) -> int:  # noqa: D401
+    def ndim(self) -> int:
         """Number of dimensions of the underlying data, by definition 1."""
         return 1
 
@@ -265,7 +265,7 @@ def get_loc(self, key):
         slice(1, 3, None)
         >>> multi_index.get_loc(('b', 'e'))
         1
-        """  # noqa: E501
+        """
 
     def max(self):
         """The maximum value of the index."""

diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -54,7 +54,7 @@ def get_rmm_memory_resource_stack(
     """
 
     if hasattr(mr, "upstream_mr"):
-        return [mr] + get_rmm_memory_resource_stack(mr.upstream_mr)
+        return [mr, *get_rmm_memory_resource_stack(mr.upstream_mr)]
     return [mr]
 
 
@@ -275,7 +275,7 @@ def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool:
         print(
             f"[WARNING] RMM allocation of {format_bytes(nbytes)} bytes "
             "failed, spill-on-demand couldn't find any device memory to "
-            f"spill:\n{repr(self)}\ntraceback:\n{get_traceback()}\n"
+            f"spill:\n{self!r}\ntraceback:\n{get_traceback()}\n"
             f"{self.statistics}"
         )
         return False  # Since we didn't find anything to spill, we give up

diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -366,7 +366,7 @@ def __str__(self) -> str:
             f"<{self.__class__.__name__} size={format_bytes(self._size)} "
             f"spillable={self.spillable} exposed={self.exposed} "
             f"num-spill-locks={len(self._spill_locks)} "
-            f"ptr={ptr_info} owner={repr(self._owner)}>"
+            f"ptr={ptr_info} owner={self._owner!r}>"
         )
 
 

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
@@ -15,17 +15,19 @@
     deserialize_columns,
     serialize_columns,
 )
-from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
-from cudf.core.column.datetime import DatetimeTZColumn  # noqa: F401
-from cudf.core.column.lists import ListColumn  # noqa: F401
-from cudf.core.column.numerical import NumericalColumn  # noqa: F401
-from cudf.core.column.string import StringColumn  # noqa: F401
-from cudf.core.column.struct import StructColumn  # noqa: F401
-from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
-from cudf.core.column.interval import IntervalColumn  # noqa: F401
-from cudf.core.column.decimal import (  # noqa: F401
+from cudf.core.column.datetime import (
+    DatetimeColumn,
+    DatetimeTZColumn,
+)
+from cudf.core.column.decimal import (
     Decimal32Column,
     Decimal64Column,
     Decimal128Column,
     DecimalBaseColumn,
 )
+from cudf.core.column.interval import IntervalColumn
+from cudf.core.column.lists import ListColumn
+from cudf.core.column.numerical import NumericalColumn
+from cudf.core.column.string import StringColumn
+from cudf.core.column.struct import StructColumn
+from cudf.core.column.timedelta import TimeDeltaColumn
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
@@ -435,7 +435,7 @@ def _get_decimal_type(
     `op` for the given dtypes.
 
     For precision & scale calculations see : https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql
-    """  # noqa: E501
+    """
 
     # This should at some point be hooked up to libcudf's
     # binary_operation_fixed_point_scale
@@ -506,8 +506,8 @@ def _get_decimal_type(
     # if we've reached this point, we cannot create a decimal type without
     # overflow; raise an informative error
     raise ValueError(
-        f"Performing {op} between columns of type {repr(lhs_dtype)} and "
-        f"{repr(rhs_dtype)} would result in overflow"
+        f"Performing {op} between columns of type {lhs_dtype!r} and "
+        f"{rhs_dtype!r} would result in overflow"
     )
 
 

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -226,7 +226,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             # If `other` is a Python integer and it is out-of-bounds
             # promotion could fail but we can trivially define the result
             # in terms of `notnull` or `NULL_NOT_EQUALS`.
-            if type(other) is int and self.dtype.kind in "iu":  # noqa: E721
+            if type(other) is int and self.dtype.kind in "iu":
                 truthiness = None
                 iinfo = np.iinfo(self.dtype)
                 if iinfo.min > other:

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
@@ -548,7 +548,7 @@ def join(
         2    <NA>
         3     c-d
         dtype: object
-        """  # noqa E501
+        """
         if sep is None:
             sep = ""
 
@@ -694,7 +694,7 @@ def extract(
 
             The `flags` parameter currently only supports re.DOTALL and
             re.MULTILINE.
-        """  # noqa W605
+        """
         if not _is_supported_regex_flags(flags):
             raise NotImplementedError(
                 "unsupported value for `flags` parameter"
@@ -830,7 +830,7 @@ def contains(
             value is set.
             The `flags` parameter currently only supports re.DOTALL and
             re.MULTILINE.
-        """  # noqa W605
+        """
         if na is not np.nan:
             raise NotImplementedError("`na` parameter is not yet supported")
         if regex and isinstance(pat, re.Pattern):
@@ -3675,7 +3675,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex:
             -   Some characters need to be escaped when passing
                 in pat. e.g. ``'$'`` has a special meaning in regex
                 and must be escaped when finding this literal character.
-        """  # noqa W605
+        """
         if isinstance(pat, re.Pattern):
             flags = pat.flags & ~re.U
             pat = pat.pattern

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
@@ -468,7 +468,7 @@ def components(self) -> dict[str, ColumnBase]:
         2  13000     10       12       48           712             0            0
         3      0      0       35       35           656             0            0
         4     37     13       12       14           234             0            0
-        """  # noqa: E501
+        """
 
         date_meta = {
             "seconds": ["m", "s"],

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
@@ -49,7 +49,7 @@ def from_zip(cls, data: abc.Iterator):
     def __getitem__(self, key):
         """Recursively apply dict.__getitem__ for nested elements."""
         # As described in the pandas docs
-        # https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index  # noqa: E501
+        # https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-indexing-with-hierarchical-index
         # accessing nested elements of a multiindex must be done using a tuple.
         # Lists and other sequences are treated as accessing multiple elements
         # at the top level of the index.
@@ -62,10 +62,10 @@ def _to_flat_dict_inner(d: dict, parents: tuple = ()):
     for k, v in d.items():
         if not isinstance(v, d.__class__):
             if parents:
-                k = parents + (k,)
+                k = (*parents, k)
             yield (k, v)
         else:
-            yield from _to_flat_dict_inner(d=v, parents=parents + (k,))
+            yield from _to_flat_dict_inner(d=v, parents=(*parents, k))
 
 
 class ColumnAccessor(abc.MutableMapping):

diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
@@ -149,7 +149,7 @@ def cut(
         if len(set(bins)) is not len(bins):
             if duplicates == "raise":
                 raise ValueError(
-                    f"Bin edges must be unique: {repr(bins)}.\n"
+                    f"Bin edges must be unique: {bins!r}.\n"
                     f"You can drop duplicate edges by setting the 'duplicates'"
                     "kwarg"
                 )

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -2248,7 +2248,7 @@ def from_dict(
         n1 n2
         a  b   1  3
            c   2  4
-        """  # noqa: E501
+        """
 
         orient = orient.lower()
         if orient == "index":
@@ -2405,7 +2405,7 @@ def to_dict(
         >>> df.to_dict('records', into=dd)
         [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
          defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
-        """  # noqa: E501
+        """
         orient = orient.lower()
 
         if orient == "series":
@@ -3033,7 +3033,7 @@ def set_index(
         if len(keys) == 0:
             raise ValueError("No valid columns to be added to index.")
         if append:
-            keys = [self.index] + keys
+            keys = [self.index, *keys]
 
         # Preliminary type check
         labels_not_found = []
@@ -3099,7 +3099,7 @@ def set_index(
     @_performance_tracking
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None
-    ):  # noqa: D102
+    ):
         if isinstance(value, (pd.Series, pd.DataFrame)):
             value = cudf.from_pandas(value)
         if isinstance(value, cudf.Series):
@@ -3580,7 +3580,7 @@ def drop_duplicates(
         1  Yum Yum   cup     4.0
         2  Indomie   cup     3.5
         4  Indomie  pack     5.0
-        """  # noqa: E501
+        """
         outdf = super().drop_duplicates(
             subset=subset,
             keep=keep,
@@ -4860,7 +4860,7 @@ def map(
 
         if na_action not in {"ignore", None}:
             raise ValueError(
-                f"na_action must be 'ignore' or None. Got {repr(na_action)}"
+                f"na_action must be 'ignore' or None. Got {na_action!r}"
             )
 
         if na_action == "ignore":
@@ -6145,7 +6145,7 @@ def quantile(
             non-numeric types and result is expected to be a Series in case of
             Pandas. cuDF will return a DataFrame as it doesn't support mixed
             types under Series.
-        """  # noqa: E501
+        """
         if axis not in (0, None):
             raise NotImplementedError("axis is not implemented yet")
 
@@ -6844,7 +6844,7 @@ def select_dtypes(self, include=None, exclude=None):
         3  False  2.0
         4   True  1.0
         5  False  2.0
-        """  # noqa: E501
+        """
 
         # code modified from:
         # https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196