From 1e0d4ae77942eb9f40bcd1139673ca5466898d20 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Wed, 6 Nov 2024 16:01:08 +0100
Subject: [PATCH 01/15] WIP

---
 docs/api-reference/expr.md      |  1 +
 docs/api-reference/series.md    |  1 +
 narwhals/_arrow/expr.py         | 10 ++++
 narwhals/_arrow/series.py       | 19 +++++++
 narwhals/_pandas_like/expr.py   | 10 ++++
 narwhals/_pandas_like/series.py | 14 +++++
 narwhals/expr.py                | 91 +++++++++++++++++++++++++++++++++
 narwhals/series.py              | 51 ++++++++++++++++++
 8 files changed, 197 insertions(+)

diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md
index 7188b2c36c..1ee8cae4f9 100644
--- a/docs/api-reference/expr.md
+++ b/docs/api-reference/expr.md
@@ -36,6 +36,7 @@
         - over
         - pipe
         - quantile
+        - rank
         - round
         - sample
         - shift
diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md
index e8572dda8d..0dda8107c1 100644
--- a/docs/api-reference/series.md
+++ b/docs/api-reference/series.md
@@ -43,6 +43,7 @@
         - null_count
         - pipe
         - quantile
+        - rank
         - rename
         - round
         - sample
diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index 35e936d72f..52b1c6875b 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -372,6 +372,16 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
     def mode(self: Self) -> Self:
         return reuse_series_implementation(self, "mode")
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"],
+        *,
+        descending: bool,
+    ) -> Self:
+        return reuse_series_implementation(
+            self, "rank", method=method, descending=descending
+        )
+
     @property
     def dt(self: Self) -> ArrowExprDateTimeNamespace:
         return ArrowExprDateTimeNamespace(self)
diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 70009df43c..3610f1f41c 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -721,6 +721,25 @@ def mode(self: Self) -> ArrowSeries:
             plx.col(col_token) == plx.col(col_token).max()
         )[self.name]
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"],
+        *,
+        descending: bool,
+    ) -> Self:
+        import pyarrow as pa  # ignore-banned-import
+        import pyarrow.compute as pc  # ignore-banned-import
+
+        sort_keys = "descending" if descending else "ascending"
+        tiebreaker = "first" if method == "ordinal" else method
+        native_series = self._native_series
+        null_mask = pc.is_null(native_series)
+
+        rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
+
+        result = pc.if_else(null_mask, pa.scalar(None), rank)
+        return self._from_native_series(result)
+
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()
 
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index a58597eea8..bc87a38337 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -387,6 +387,16 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
     def mode(self: Self) -> Self:
         return reuse_series_implementation(self, "mode")
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"],
+        *,
+        descending: bool,
+    ) -> Self:
+        return reuse_series_implementation(
+            self, "rank", method=method, descending=descending
+        )
+
     @property
     def str(self: Self) -> PandasLikeExprStringNamespace:
         return PandasLikeExprStringNamespace(self)
diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index 078e857b9e..057237b73c 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -682,6 +682,20 @@ def mode(self: Self) -> Self:
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"],
+        *,
+        descending: bool,
+    ) -> Self:
+        result = self._native_series.rank(
+            method="first" if method == "ordinal" else method,
+            na_option="keep",
+            ascending=not descending,
+            pct=False,
+        )
+        return self._from_native_series(result)
+
     @property
     def str(self) -> PandasLikeSeriesStringNamespace:
         return PandasLikeSeriesStringNamespace(self)
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 2f986760c8..07e2fc92af 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -2310,6 +2310,97 @@ def mode(self: Self) -> Self:
         """
         return self.__class__(lambda plx: self._call(plx).mode())
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"] = "average",
+        *,
+        descending: bool = False,
+    ) -> Self:
+        """
+        Assign ranks to data, dealing with ties appropriately.
+
+        Arguments:
+            method: The method used to assign ranks to tied elements.
+                The following methods are available (default is 'average'):
+
+                - 'average' : The average of the ranks that would have been assigned to
+                  all the tied values is assigned to each value.
+                - 'min' : The minimum of the ranks that would have been assigned to all
+                    the tied values is assigned to each value. (This is also referred to
+                    as "competition" ranking.)
+                - 'max' : The maximum of the ranks that would have been assigned to all
+                    the tied values is assigned to each value.
+                - 'dense' : Like 'min', but the rank of the next highest element is
+                   assigned the rank immediately after those assigned to the tied
+                   elements.
+                - 'ordinal' : All values are given a distinct rank, corresponding to the
+                    order that the values occur in the Series.
+
+            descending: Rank in descending order.
+
+        Examples
+        --------
+        The 'average' method:
+
+        >>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
+        >>> df.select(pl.col("a").rank())
+        shape: (5, 1)
+        ┌─────┐
+        │ a   │
+        │ --- │
+        │ f64 │
+        ╞═════╡
+        │ 3.0 │
+        │ 4.5 │
+        │ 1.5 │
+        │ 1.5 │
+        │ 4.5 │
+        └─────┘
+
+        The 'ordinal' method:
+
+        >>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
+        >>> df.select(pl.col("a").rank("ordinal"))
+        shape: (5, 1)
+        ┌─────┐
+        │ a   │
+        │ --- │
+        │ u32 │
+        ╞═════╡
+        │ 3   │
+        │ 4   │
+        │ 1   │
+        │ 2   │
+        │ 5   │
+        └─────┘
+
+        Use 'rank' with 'over' to rank within groups:
+
+        >>> df = pl.DataFrame({"a": [1, 1, 2, 2, 2], "b": [6, 7, 5, 14, 11]})
+        >>> df.with_columns(pl.col("b").rank().over("a").alias("rank"))
+        shape: (5, 3)
+        ┌─────┬─────┬──────┐
+        │ a   ┆ b   ┆ rank │
+        │ --- ┆ --- ┆ ---  │
+        │ i64 ┆ i64 ┆ f64  │
+        ╞═════╪═════╪══════╡
+        │ 1   ┆ 6   ┆ 1.0  │
+        │ 1   ┆ 7   ┆ 2.0  │
+        │ 2   ┆ 5   ┆ 1.0  │
+        │ 2   ┆ 14  ┆ 3.0  │
+        │ 2   ┆ 11  ┆ 2.0  │
+        └─────┴─────┴──────┘
+        """
+
+        supported_rank_methods = {"average", "min", "max", "dense"}
+        if method not in supported_rank_methods:
+            msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
+            raise ValueError(msg)
+
+        return self.__class__(
+            lambda plx: self._call(plx).rank(method=method, descending=descending)
+        )
+
     @property
     def str(self: Self) -> ExprStringNamespace[Self]:
         return ExprStringNamespace(self)
diff --git a/narwhals/series.py b/narwhals/series.py
index dac5c6d795..868a9eb5c0 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -2525,6 +2525,57 @@ def mode(self: Self) -> Self:
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._compliant_series.__iter__()
 
+    def rank(
+        self: Self,
+        method: Literal["average", "min", "max", "dense", "ordinal"] = "average",
+        *,
+        descending: bool = False,
+    ) -> Self:
+        """
+        Assign ranks to data, dealing with ties appropriately.
+
+        Arguments:
+            method: The method used to assign ranks to tied elements.
+                The following methods are available (default is 'average'):
+
+                - 'average' : The average of the ranks that would have been assigned to
+                  all the tied values is assigned to each value.
+                - 'min' : The minimum of the ranks that would have been assigned to all
+                    the tied values is assigned to each value. (This is also referred to
+                    as "competition" ranking.)
+                - 'max' : The maximum of the ranks that would have been assigned to all
+                    the tied values is assigned to each value.
+                - 'dense' : Like 'min', but the rank of the next highest element is
+                   assigned the rank immediately after those assigned to the tied
+                   elements.
+                - 'ordinal' : All values are given a distinct rank, corresponding to the
+                    order that the values occur in the Series.
+
+            descending: Rank in descending order.
+
+        Examples:
+
+        >>> s = pl.Series("a", [3, 6, 1, 1, 6])
+        >>> s.rank()
+        shape: (5,)
+        Series: 'a' [f64]
+        [
+            3.0
+            4.5
+            1.5
+            1.5
+            4.5
+        ]
+        """
+        supported_rank_methods = {"average", "min", "max", "dense"}
+        if method not in supported_rank_methods:
+            msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
+            raise ValueError(msg)
+
+        return self._from_compliant_series(
+            self._compliant_series.rank(method=method, descending=descending)
+        )
+
     @property
     def str(self: Self) -> SeriesStringNamespace[Self]:
         return SeriesStringNamespace(self)

From ebf4321a0c4f98a76f02fde380575f95080ea639 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 7 Nov 2024 15:07:31 +0100
Subject: [PATCH 02/15] WIP

---
 narwhals/_arrow/series.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 3610f1f41c..48b90c118b 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -730,15 +730,18 @@ def rank(
         import pyarrow as pa  # ignore-banned-import
         import pyarrow.compute as pc  # ignore-banned-import
 
-        sort_keys = "descending" if descending else "ascending"
-        tiebreaker = "first" if method == "ordinal" else method
-        native_series = self._native_series
-        null_mask = pc.is_null(native_series)
+        if method != "average":
+            sort_keys = "descending" if descending else "ascending"
+            tiebreaker = "first" if method == "ordinal" else method
+            native_series = self._native_series
+            null_mask = pc.is_null(native_series)
 
-        rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
+            rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
 
-        result = pc.if_else(null_mask, pa.scalar(None), rank)
-        return self._from_native_series(result)
+            result = pc.if_else(null_mask, pa.scalar(None), rank)
+            return self._from_native_series(result)
+        else:
+            pass 
 
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()

From e60214d817c3afcfaf0e3f5f7d4266edd996eac3 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 8 Nov 2024 09:51:45 +0100
Subject: [PATCH 03/15] WIPWIP

---
 narwhals/_arrow/series.py          | 25 +++++---
 narwhals/expr.py                   | 98 ++++++++++++++----------------
 narwhals/series.py                 | 64 ++++++++++++++-----
 tests/expr_and_series/rank_test.py | 98 ++++++++++++++++++++++++++++++
 4 files changed, 210 insertions(+), 75 deletions(-)
 create mode 100644 tests/expr_and_series/rank_test.py

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 48b90c118b..3278920a56 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -727,21 +727,26 @@ def rank(
         *,
         descending: bool,
     ) -> Self:
+        if method == "average":
+            msg = (
+                "`rank` with `method='average' is not supported for pyarrow backend. "
+                "The available methods are {'min', 'max', 'dense', 'ordinal'}."
+            )
+            raise ValueError(msg)
+
         import pyarrow as pa  # ignore-banned-import
         import pyarrow.compute as pc  # ignore-banned-import
 
-        if method != "average":
-            sort_keys = "descending" if descending else "ascending"
-            tiebreaker = "first" if method == "ordinal" else method
-            native_series = self._native_series
-            null_mask = pc.is_null(native_series)
+        sort_keys = "descending" if descending else "ascending"
+        tiebreaker = "first" if method == "ordinal" else method
 
-            rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
+        native_series = self._native_series
+        null_mask = pc.is_null(native_series)
 
-            result = pc.if_else(null_mask, pa.scalar(None), rank)
-            return self._from_native_series(result)
-        else:
-            pass 
+        rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
+
+        result = pc.if_else(null_mask, pa.scalar(None), rank)
+        return self._from_native_series(result)
 
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 07e2fc92af..c0c83714f0 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -2319,6 +2319,9 @@ def rank(
         """
         Assign ranks to data, dealing with ties appropriately.
 
+        Notes:
+            The resulting dtype may differ between backends.
+
         Arguments:
             method: The method used to assign ranks to tied elements.
                 The following methods are available (default is 'average'):
@@ -2338,61 +2341,54 @@ def rank(
 
             descending: Rank in descending order.
 
-        Examples
-        --------
-        The 'average' method:
+        Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = {"a": [3, 6, 1, 1, 6]}
 
-        >>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
-        >>> df.select(pl.col("a").rank())
-        shape: (5, 1)
-        ┌─────┐
-        │ a   │
-        │ --- │
-        │ f64 │
-        ╞═════╡
-        │ 3.0 │
-        │ 4.5 │
-        │ 1.5 │
-        │ 1.5 │
-        │ 4.5 │
-        └─────┘
+            We define a dataframe-agnostic function that computes the dense rank for
+            the data:
 
-        The 'ordinal' method:
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.with_columns(rnk=nw.col("a").rank(method="dense"))
 
-        >>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
-        >>> df.select(pl.col("a").rank("ordinal"))
-        shape: (5, 1)
-        ┌─────┐
-        │ a   │
-        │ --- │
-        │ u32 │
-        ╞═════╡
-        │ 3   │
-        │ 4   │
-        │ 1   │
-        │ 2   │
-        │ 5   │
-        └─────┘
+            We can then pass any supported library such as pandas, Polars, or PyArrow:
+
+            >>> func(pl.DataFrame(data))
+            shape: (5, 2)
+            ┌─────┬─────┐
+            │ a   ┆ rnk │
+            │ --- ┆ --- │
+            │ i64 ┆ u32 │
+            ╞═════╪═════╡
+            │ 3   ┆ 2   │
+            │ 6   ┆ 3   │
+            │ 1   ┆ 1   │
+            │ 1   ┆ 1   │
+            │ 6   ┆ 3   │
+            └─────┴─────┘
+
+            >>> func(pd.DataFrame(data))
+               a  rnk
+            0  3  2.0
+            1  6  3.0
+            2  1  1.0
+            3  1  1.0
+            4  6  3.0
+
+            >>> func(pa.table(data))
+            pyarrow.Table
+            a: int64
+            rnk: uint64
+            ----
+            a: [[3,6,1,1,6]]
+            rnk: [[2,3,1,1,3]]
+        """
 
-        Use 'rank' with 'over' to rank within groups:
-
-        >>> df = pl.DataFrame({"a": [1, 1, 2, 2, 2], "b": [6, 7, 5, 14, 11]})
-        >>> df.with_columns(pl.col("b").rank().over("a").alias("rank"))
-        shape: (5, 3)
-        ┌─────┬─────┬──────┐
-        │ a   ┆ b   ┆ rank │
-        │ --- ┆ --- ┆ ---  │
-        │ i64 ┆ i64 ┆ f64  │
-        ╞═════╪═════╪══════╡
-        │ 1   ┆ 6   ┆ 1.0  │
-        │ 1   ┆ 7   ┆ 2.0  │
-        │ 2   ┆ 5   ┆ 1.0  │
-        │ 2   ┆ 14  ┆ 3.0  │
-        │ 2   ┆ 11  ┆ 2.0  │
-        └─────┴─────┴──────┘
-        """
-
-        supported_rank_methods = {"average", "min", "max", "dense"}
+        supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
         if method not in supported_rank_methods:
             msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
             raise ValueError(msg)
diff --git a/narwhals/series.py b/narwhals/series.py
index 868a9eb5c0..7aa921eec9 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -2534,6 +2534,9 @@ def rank(
         """
         Assign ranks to data, dealing with ties appropriately.
 
+        Notes:
+            The resulting dtype may differ between backends.
+
         Arguments:
             method: The method used to assign ranks to tied elements.
                 The following methods are available (default is 'average'):
@@ -2554,20 +2557,53 @@ def rank(
             descending: Rank in descending order.
 
         Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = [3, 6, 1, 1, 6]
+
+            We define a dataframe-agnostic function that computes the dense rank for
+            the data:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.rank(method="dense")
 
-        >>> s = pl.Series("a", [3, 6, 1, 1, 6])
-        >>> s.rank()
-        shape: (5,)
-        Series: 'a' [f64]
-        [
-            3.0
-            4.5
-            1.5
-            1.5
-            4.5
-        ]
-        """
-        supported_rank_methods = {"average", "min", "max", "dense"}
+            We can then pass any supported library such as pandas, Polars, or PyArrow:
+
+            >>> func(pl.Series(data))  # doctest:+NORMALIZE_WHITESPACE
+            shape: (5,)
+            Series: '' [u32]
+            [
+               2
+               3
+               1
+               1
+               3
+            ]
+
+            >>> func(pd.Series(data))
+            0    2.0
+            1    3.0
+            2    1.0
+            3    1.0
+            4    3.0
+            dtype: float64
+
+            >>> func(pa.chunked_array([data]))  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                2,
+                3,
+                1,
+                1,
+                3
+              ]
+            ]
+        """
+        supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
         if method not in supported_rank_methods:
             msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
             raise ValueError(msg)
@@ -3220,7 +3256,7 @@ def to_datetime(self: Self, format: str | None = None) -> T:  # noqa: A002
             ... def func(s):
             ...     return s.str.to_datetime(format="%Y-%m-%d")
 
-            We can then pass any supported library such as pandas, Polars, or PyArrow::
+            We can then pass any supported library such as pandas, Polars, or PyArrow:
 
             >>> func(s_pd)
             0   2020-01-01
diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
new file mode 100644
index 0000000000..090605bf48
--- /dev/null
+++ b/tests/expr_and_series/rank_test.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+from contextlib import nullcontext as does_not_raise
+from typing import Literal
+
+import pytest
+
+import narwhals.stable.v1 as nw
+from tests.utils import Constructor
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+
+rank_methods = ["average", "min", "max", "dense", "ordinal"]
+
+data = {"a": [3, 6, 1, 1, None, 6], "b": [1, 1, 2, 1, 2, 2]}
+
+expected = {
+    "average": [3.0, 4.5, 1.5, 1.5, float("nan"), 4.5],
+    "min": [3, 4, 1, 1, float("nan"), 4],
+    "max": [3, 5, 2, 2, float("nan"), 5],
+    "dense": [2, 3, 1, 1, float("nan"), 3],
+    "ordinal": [3, 4, 1, 2, float("nan"), 5],
+}
+
+expected_over = {
+    "average": [2.0, 3.0, 1.0, 1.0, float("nan"), 2.0],
+    "min": [2, 3, 1, 1, float("nan"), 2],
+    "max": [2, 3, 1, 1, float("nan"), 2],
+    "dense": [2, 3, 1, 1, float("nan"), 2],
+    "ordinal": [2, 3, 1, 1, float("nan"), 2],
+}
+
+
+@pytest.mark.parametrize("method", rank_methods)
+def test_rank_expr(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    method: Literal["average", "min", "max", "dense", "ordinal"],
+) -> None:
+    if "dask" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+
+    context = (
+        pytest.raises(
+            ValueError,
+            match=r"`rank` with `method='average' is not supported for pyarrow backend.",
+        )
+        if "pyarrow_table" in str(constructor) and method == "average"
+        else does_not_raise()
+    )
+
+    with context:
+        df = nw.from_native(constructor(data))
+
+        result = df.select(nw.col("a").rank(method=method))
+        expected_data = {"a": expected[method]}
+        assert_equal_data(result, expected_data)
+
+
+@pytest.mark.parametrize("method", rank_methods)
+def test_rank_series(
+    constructor_eager: ConstructorEager,
+    method: Literal["average", "min", "max", "dense", "ordinal"],
+) -> None:
+    context = (
+        pytest.raises(
+            ValueError,
+            match=r"`rank` with `method='average' is not supported for pyarrow backend.",
+        )
+        if "pyarrow_table" in str(constructor_eager) and method == "average"
+        else does_not_raise()
+    )
+
+    with context:
+        df = nw.from_native(constructor_eager(data), eager_only=True)
+
+        result = {"a": df["a"].rank(method=method)}
+        expected_data = {"a": expected[method]}
+        assert_equal_data(result, expected_data)
+
+
+@pytest.mark.parametrize("method", rank_methods)
+def test_rank_expr_in_over_context(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    method: Literal["average", "min", "max", "dense", "ordinal"],
+) -> None:
+    if "pyarrow_table" in str(constructor) or "dask" in str(constructor):
+        # Pyarrow raises:
+        # > pyarrow.lib.ArrowKeyError: No function registered with name: hash_rank
+        # We can handle that to provide a better error message.
+        request.applymarker(pytest.mark.xfail)
+
+    df = nw.from_native(constructor(data))
+
+    result = df.select(nw.col("a").rank(method=method).over("b"))
+    expected_data = {"a": expected_over[method]}
+    assert_equal_data(result, expected_data)

From cbc13b55b456d37e3294c74f67e20a9e3ff2697e Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 9 Nov 2024 19:46:06 +0100
Subject: [PATCH 04/15] pandas int workaround

---
 narwhals/_pandas_like/series.py    | 50 +++++++++++++++++++++++++-----
 tests/expr_and_series/rank_test.py | 11 +++++--
 2 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index 614b4a7951..97c7720c32 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -725,13 +725,49 @@ def rank(
         *,
         descending: bool,
     ) -> Self:
-        result = self._native_series.rank(
-            method="first" if method == "ordinal" else method,
-            na_option="keep",
-            ascending=not descending,
-            pct=False,
-        )
-        return self._from_native_series(result)
+        pd_method = "first" if method == "ordinal" else method
+        native_series = self._native_series
+
+        if (
+            self._implementation is Implementation.PANDAS
+            and self._backend_version < (3,)
+            and self.dtype
+            in {
+                self._dtypes.Int64,
+                self._dtypes.Int32,
+                self._dtypes.Int16,
+                self._dtypes.Int8,
+                self._dtypes.UInt64,
+                self._dtypes.UInt32,
+                self._dtypes.UInt16,
+                self._dtypes.UInt8,
+            }
+            and (null_mask := native_series.isna()).any()
+        ):
+            # crazy workaround for the case of `na_option="keep"` and nullable
+            # integer dtypes. This should be supported in pandas > 3.0
+            # https://github.com/pandas-dev/pandas/issues/56976
+            ranked_series = (
+                native_series.to_frame()
+                .assign(**{f"{native_series.name}_is_null": null_mask})
+                .groupby(f"{native_series.name}_is_null")
+                .rank(
+                    method=pd_method,
+                    na_option="keep",
+                    ascending=not descending,
+                    pct=False,
+                )[native_series.name]
+            )
+
+        else:
+            ranked_series = native_series.rank(
+                method=pd_method,
+                na_option="keep",
+                ascending=not descending,
+                pct=False,
+            )
+
+        return self._from_native_series(ranked_series)
 
     @property
     def str(self) -> PandasLikeSeriesStringNamespace:
diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index 090605bf48..695e4a37a5 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -12,7 +12,8 @@
 
 rank_methods = ["average", "min", "max", "dense", "ordinal"]
 
-data = {"a": [3, 6, 1, 1, None, 6], "b": [1, 1, 2, 1, 2, 2]}
+data_int = {"a": [3, 6, 1, 1, None, 6], "b": [1, 1, 2, 1, 2, 2]}
+data_float = {"a": [3.1, 6.1, 1.5, 1.5, None, 6.1], "b": [1, 1, 2, 1, 2, 2]}
 
 expected = {
     "average": [3.0, 4.5, 1.5, 1.5, float("nan"), 4.5],
@@ -32,10 +33,12 @@
 
 
 @pytest.mark.parametrize("method", rank_methods)
+@pytest.mark.parametrize("data", [data_int, data_float])
 def test_rank_expr(
     request: pytest.FixtureRequest,
     constructor: Constructor,
     method: Literal["average", "min", "max", "dense", "ordinal"],
+    data: dict[str, float],
 ) -> None:
     if "dask" in str(constructor):
         request.applymarker(pytest.mark.xfail)
@@ -58,9 +61,11 @@ def test_rank_expr(
 
 
 @pytest.mark.parametrize("method", rank_methods)
+@pytest.mark.parametrize("data", [data_int, data_float])
 def test_rank_series(
     constructor_eager: ConstructorEager,
     method: Literal["average", "min", "max", "dense", "ordinal"],
+    data: dict[str, float],
 ) -> None:
     context = (
         pytest.raises(
@@ -85,13 +90,13 @@ def test_rank_expr_in_over_context(
     constructor: Constructor,
     method: Literal["average", "min", "max", "dense", "ordinal"],
 ) -> None:
-    if "pyarrow_table" in str(constructor) or "dask" in str(constructor):
+    if "polars" not in str(constructor):
         # Pyarrow raises:
         # > pyarrow.lib.ArrowKeyError: No function registered with name: hash_rank
         # We can handle that to provide a better error message.
         request.applymarker(pytest.mark.xfail)
 
-    df = nw.from_native(constructor(data))
+    df = nw.from_native(constructor(data_int))
 
     result = df.select(nw.col("a").rank(method=method).over("b"))
     expected_data = {"a": expected_over[method]}

From 8b492d59b761d1a19d2f34fb5afbf16fdccface8 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 9 Nov 2024 20:51:37 +0100
Subject: [PATCH 05/15] comma?

---
 narwhals/expr.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/narwhals/expr.py b/narwhals/expr.py
index bd38d63942..823f985fe0 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -1003,7 +1003,8 @@ def replace_strict(
             ... def func(df):
             ...     return df.with_columns(
             ...         b=nw.col("a").replace_strict(
-            ...             [0,1,2,3], ['zero', 'one', 'two', 'three']
+            ...             [0, 1, 2, 3],
+            ...             ["zero", "one", "two", "three"],
             ...             return_dtype=nw.String,
             ...         )
             ...     )

From 4c8cc1b9090fc315126a90b292b6ed17ac51deb1 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sun, 10 Nov 2024 11:07:55 +0100
Subject: [PATCH 06/15] merge main, test invalid method

---
 narwhals/expr.py                   |  5 ++++-
 narwhals/series.py                 |  5 ++++-
 tests/expr_and_series/rank_test.py | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/narwhals/expr.py b/narwhals/expr.py
index 089e56fcfe..8c6fc8b276 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -2528,7 +2528,10 @@ def rank(
 
         supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
         if method not in supported_rank_methods:
-            msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
+            msg = (
+                "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+                f"Found '{method}'"
+            )
             raise ValueError(msg)
 
         return self.__class__(
diff --git a/narwhals/series.py b/narwhals/series.py
index b49e2d0931..5f5a27c9c6 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -2731,7 +2731,10 @@ def rank(
         """
         supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
         if method not in supported_rank_methods:
-            msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
+            msg = (
+                "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+                f"Found '{method}'"
+            )
             raise ValueError(msg)
 
         return self._from_compliant_series(
diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index 695e4a37a5..259bfbd5dc 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -101,3 +101,19 @@ def test_rank_expr_in_over_context(
     result = df.select(nw.col("a").rank(method=method).over("b"))
     expected_data = {"a": expected_over[method]}
     assert_equal_data(result, expected_data)
+
+
+def test_invalid_method_raise(constructor: Constructor) -> None:
+    method = "invalid_method_name"
+    df = nw.from_native(constructor(data_float))
+
+    msg = (
+        "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+        f"Found '{method}'"
+    )
+
+    with pytest.raises(ValueError, match=msg):
+        df.select(nw.col("a").rank(method=method))  # type: ignore[arg-type]
+
+    with pytest.raises(ValueError, match=msg):
+        df.lazy().collect()["a"].rank(method=method)  # type: ignore[arg-type]

From ec0f8a7ccf72bdb1b63b112e545b9c1fe0dc5a50 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sun, 10 Nov 2024 11:20:37 +0100
Subject: [PATCH 07/15] old pyarrow

---
 narwhals/_arrow/series.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index fc095715b6..dcbfa855ed 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -777,6 +777,9 @@ def rank(
         tiebreaker = "first" if method == "ordinal" else method
 
         native_series = self._native_series
+        if self._backend_version < (14, 0, 0):  # pragma: no cover
+            native_series = native_series.combine_chunks()
+
         null_mask = pc.is_null(native_series)
 
         rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)

From e8989e35bcd5a7358ed12914b3cc5455ee365893 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Wed, 18 Dec 2024 11:21:37 +0100
Subject: [PATCH 08/15] WIP

---
 narwhals/_pandas_like/expr.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 5c1ab22024..eb9e4e56bd 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -33,6 +33,7 @@
     # Pandas cumcount counts nulls while Polars does not
     # So, instead of using "cumcount" we use "cumsum" on notna() to get the same result
     "col->cum_count": "cumsum",
+    "col->rank": "rank",
 }
 
 
@@ -411,7 +412,7 @@ def alias(self, name: str) -> Self:
             version=self._version,
         )
 
-    def over(self, keys: list[str]) -> Self:
+    def over(self: Self, keys: list[str]) -> Self:
         if self._function_name in CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT:
 
             def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
@@ -430,11 +431,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                     plx = self.__narwhals_namespace__()
                     df = df.with_columns(~plx.col(*self._root_names).is_null())
 
-                res_native = df._native_frame.groupby(list(keys), as_index=False)[
-                    self._root_names
-                ].transform(
-                    CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT[self._function_name]
-                )
+                res_native = getattr(
+                    df._native_frame.groupby(list(keys), as_index=False)[
+                        self._root_names
+                    ],
+                    CUMULATIVE_FUNCTIONS_TO_PANDAS_EQUIVALENT[self._function_name],
+                )()
                 result_frame = df._from_native_frame(
                     rename(
                         res_native,

From 07571c89bc5f6dc43619567d00810c2cd4c6eaf5 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 26 Dec 2024 20:22:16 +0100
Subject: [PATCH 09/15] fail pandas_pyarrow for pandas < (2,1)

---
 tests/expr_and_series/rank_test.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index 9d3a60a162..e64d58574e 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -6,6 +6,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
@@ -40,7 +41,9 @@ def test_rank_expr(
     method: Literal["average", "min", "max", "dense", "ordinal"],
     data: dict[str, float],
 ) -> None:
-    if "dask" in str(constructor):
+    if "dask" in str(constructor) or (
+        "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1)
+    ):
         request.applymarker(pytest.mark.xfail)
 
     context = (
@@ -63,10 +66,14 @@ def test_rank_expr(
 @pytest.mark.parametrize("method", rank_methods)
 @pytest.mark.parametrize("data", [data_int, data_float])
 def test_rank_series(
+    request: pytest.FixtureRequest,
     constructor_eager: ConstructorEager,
     method: Literal["average", "min", "max", "dense", "ordinal"],
     data: dict[str, float],
 ) -> None:
+    if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1):
+        request.applymarker(pytest.mark.xfail)
+
     context = (
         pytest.raises(
             ValueError,
@@ -96,10 +103,10 @@ def test_rank_expr_in_over_context(
         # We can handle that to provide a better error message.
         request.applymarker(pytest.mark.xfail)
 
+    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1):
+        request.applymarker(pytest.mark.xfail)
+
     if method == "ordinal" and "polars" not in str(constructor):
-        # Pyarrow raises:
-        # > pyarrow.lib.ArrowKeyError: No function registered with name: hash_rank
-        # We can handle that to provide a better error message.
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(data_int))

From 96520ae6061c9b22e762f2d2a7b4d6f1031cfc9f Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 27 Dec 2024 08:54:36 +0100
Subject: [PATCH 10/15] xfail int only

---
 tests/expr_and_series/rank_test.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index e64d58574e..db589e7e36 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -39,10 +39,15 @@ def test_rank_expr(
     request: pytest.FixtureRequest,
     constructor: Constructor,
     method: Literal["average", "min", "max", "dense", "ordinal"],
-    data: dict[str, float],
+    data: dict[str, list[float]],
 ) -> None:
-    if "dask" in str(constructor) or (
-        "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1)
+    if "dask" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+
+    if (
+        "pandas_pyarrow" in str(constructor)
+        and PANDAS_VERSION < (2, 1)
+        and isinstance(data["a"][0], int)
     ):
         request.applymarker(pytest.mark.xfail)
 
@@ -69,9 +74,13 @@ def test_rank_series(
     request: pytest.FixtureRequest,
     constructor_eager: ConstructorEager,
     method: Literal["average", "min", "max", "dense", "ordinal"],
-    data: dict[str, float],
+    data: dict[str, list[float]],
 ) -> None:
-    if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1):
+    if (
+        "pandas_pyarrow" in str(constructor_eager)
+        and PANDAS_VERSION < (2, 1)
+        and isinstance(data["a"][0], int)
+    ):
         request.applymarker(pytest.mark.xfail)
 
     context = (

From 6ad961e5efae8b96d944eeef8030a184dd243ee6 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 27 Dec 2024 08:58:03 +0100
Subject: [PATCH 11/15] fix options in over

---
 tests/expr_and_series/rank_test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index db589e7e36..16f95a82ba 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -115,10 +115,7 @@ def test_rank_expr_in_over_context(
     if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1):
         request.applymarker(pytest.mark.xfail)
 
-    if method == "ordinal" and "polars" not in str(constructor):
-        request.applymarker(pytest.mark.xfail)
-
-    df = nw.from_native(constructor(data_int))
+    df = nw.from_native(constructor(data_float))
 
     result = df.select(nw.col("a").rank(method=method).over("b"))
     expected_data = {"a": expected_over[method]}

From 5c565a4fe1cb4238ebd5cfeec47a153392dfce76 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 27 Dec 2024 09:00:41 +0100
Subject: [PATCH 12/15] forgot a file

---
 narwhals/_pandas_like/expr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 298e152d4c..e76c1940c2 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -448,9 +448,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                 if self._function_name == "col->shift":
                     kwargs = {"periods": self._kwargs.get("n", 1)}
                 elif self._function_name == "col->rank":
+                    _method = self._kwargs.get("method", "average")
                     kwargs = {
-                        "method": self._kwargs.get("method", "average"),
+                        "method": "first" if _method == "ordinal" else _method,
                         "ascending": not self._kwargs.get("descending", False),
+                        "na_option": "keep",
+                        "pct": False,
                     }
                 else:  # Cumulative operation
                     kwargs = {"skipna": True}

From 1b550e42454e0c216ac0e2643dc1661b14725e47 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Mon, 30 Dec 2024 16:29:39 +0100
Subject: [PATCH 13/15] merge main and better return docstring

---
 narwhals/expr.py   | 2 +-
 narwhals/series.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/narwhals/expr.py b/narwhals/expr.py
index ea8e326a60..e8794c987c 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -3826,7 +3826,7 @@ def rank(
             descending: Rank in descending order.
 
         Returns:
-            A new expression.
+            A new expression with rank data.
 
         Examples:
             >>> import narwhals as nw
diff --git a/narwhals/series.py b/narwhals/series.py
index 8db9308c56..db7c0120f8 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -3977,7 +3977,7 @@ def rank(
             descending: Rank in descending order.
 
         Returns:
-            A new series
+            A new series with rank data as values.
 
         Examples:
             >>> import narwhals as nw

From b68f575d0884a659859a6dbaae7805dd913d7103 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 2 Jan 2025 14:47:59 +0100
Subject: [PATCH 14/15] float(nan) -> None

---
 tests/expr_and_series/rank_test.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index 16f95a82ba..b3de335d77 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -17,19 +17,19 @@
 data_float = {"a": [3.1, 6.1, 1.5, 1.5, None, 6.1], "b": [1, 1, 2, 1, 2, 2]}
 
 expected = {
-    "average": [3.0, 4.5, 1.5, 1.5, float("nan"), 4.5],
-    "min": [3, 4, 1, 1, float("nan"), 4],
-    "max": [3, 5, 2, 2, float("nan"), 5],
-    "dense": [2, 3, 1, 1, float("nan"), 3],
-    "ordinal": [3, 4, 1, 2, float("nan"), 5],
+    "average": [3.0, 4.5, 1.5, 1.5, None, 4.5],
+    "min": [3, 4, 1, 1, None, 4],
+    "max": [3, 5, 2, 2, None, 5],
+    "dense": [2, 3, 1, 1, None, 3],
+    "ordinal": [3, 4, 1, 2, None, 5],
 }
 
 expected_over = {
-    "average": [2.0, 3.0, 1.0, 1.0, float("nan"), 2.0],
-    "min": [2, 3, 1, 1, float("nan"), 2],
-    "max": [2, 3, 1, 1, float("nan"), 2],
-    "dense": [2, 3, 1, 1, float("nan"), 2],
-    "ordinal": [2, 3, 1, 1, float("nan"), 2],
+    "average": [2.0, 3.0, 1.0, 1.0, None, 2.0],
+    "min": [2, 3, 1, 1, None, 2],
+    "max": [2, 3, 1, 1, None, 2],
+    "dense": [2, 3, 1, 1, None, 2],
+    "ordinal": [2, 3, 1, 1, None, 2],
 }
 
 

From 6c72df7e423b33ac4f47e7d507a476c80f322bec Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Tue, 7 Jan 2025 10:10:39 +0100
Subject: [PATCH 15/15] test eager only for rank

---
 tests/expr_and_series/rank_test.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/tests/expr_and_series/rank_test.py b/tests/expr_and_series/rank_test.py
index b3de335d77..99a64371e6 100644
--- a/tests/expr_and_series/rank_test.py
+++ b/tests/expr_and_series/rank_test.py
@@ -7,7 +7,6 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import PANDAS_VERSION
-from tests.utils import Constructor
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
@@ -37,15 +36,12 @@
 @pytest.mark.parametrize("data", [data_int, data_float])
 def test_rank_expr(
     request: pytest.FixtureRequest,
-    constructor: Constructor,
+    constructor_eager: ConstructorEager,
     method: Literal["average", "min", "max", "dense", "ordinal"],
     data: dict[str, list[float]],
 ) -> None:
-    if "dask" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
-
     if (
-        "pandas_pyarrow" in str(constructor)
+        "pandas_pyarrow" in str(constructor_eager)
         and PANDAS_VERSION < (2, 1)
         and isinstance(data["a"][0], int)
     ):
@@ -56,12 +52,12 @@ def test_rank_expr(
             ValueError,
             match=r"`rank` with `method='average' is not supported for pyarrow backend.",
         )
-        if "pyarrow_table" in str(constructor) and method == "average"
+        if "pyarrow_table" in str(constructor_eager) and method == "average"
         else does_not_raise()
     )
 
     with context:
-        df = nw.from_native(constructor(data))
+        df = nw.from_native(constructor_eager(data))
 
         result = df.select(nw.col("a").rank(method=method))
         expected_data = {"a": expected[method]}
@@ -103,28 +99,28 @@ def test_rank_series(
 @pytest.mark.parametrize("method", rank_methods)
 def test_rank_expr_in_over_context(
     request: pytest.FixtureRequest,
-    constructor: Constructor,
+    constructor_eager: ConstructorEager,
     method: Literal["average", "min", "max", "dense", "ordinal"],
 ) -> None:
-    if any(x in str(constructor) for x in ("pyarrow_table", "dask")):
+    if any(x in str(constructor_eager) for x in ("pyarrow_table", "dask")):
         # Pyarrow raises:
         # > pyarrow.lib.ArrowKeyError: No function registered with name: hash_rank
         # We can handle that to provide a better error message.
         request.applymarker(pytest.mark.xfail)
 
-    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1):
+    if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1):
         request.applymarker(pytest.mark.xfail)
 
-    df = nw.from_native(constructor(data_float))
+    df = nw.from_native(constructor_eager(data_float))
 
     result = df.select(nw.col("a").rank(method=method).over("b"))
     expected_data = {"a": expected_over[method]}
     assert_equal_data(result, expected_data)
 
 
-def test_invalid_method_raise(constructor: Constructor) -> None:
+def test_invalid_method_raise(constructor_eager: ConstructorEager) -> None:
     method = "invalid_method_name"
-    df = nw.from_native(constructor(data_float))
+    df = nw.from_native(constructor_eager(data_float))
 
     msg = (
         "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "