From fc649376221e52369c26f41b4427dd72b4debe32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 30 Oct 2024 18:41:03 +0100 Subject: [PATCH 01/25] wip --- docs/api-reference/series.md | 1 + mkdocs.yml | 9 +++++++++ narwhals/_pandas_like/series.py | 18 ++++++++++++++++++ narwhals/expr.py | 23 +++++++++++++++++++++++ narwhals/series.py | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+) diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index e8572dda8..cbf4837bb 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -19,6 +19,7 @@ - diff - drop_nulls - dtype + - ewm_mean - fill_null - filter - gather_every diff --git a/mkdocs.yml b/mkdocs.yml index 46cb5335f..79cd57b53 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -112,3 +112,12 @@ markdown_extensions: - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg +- pymdownx.arithmatex: + generic: true +extra_javascript: + - javascripts/katex.js + - https://unpkg.com/katex@0/dist/katex.min.js + - https://unpkg.com/katex@0/dist/contrib/auto-render.min.js + +extra_css: + - https://unpkg.com/katex@0/dist/katex.min.css \ No newline at end of file diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 35df78e2f..466ddc28d 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -174,6 +174,24 @@ def dtype(self: Self) -> DType: self._native_series, self._dtypes, self._implementation ) + def ewm_mean( + self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> PandasLikeSeries: + ser = self._native_series + result = ser.ewm( + com, span, half_life, alpha, min_periods, adjust, ignore_na=ignore_nulls + ).mean() + + return self._from_native_series(result) + def scatter(self, indices: int | Sequence[int], values: Any) -> Self: if isinstance(values, self.__class__): # .copy() is necessary in some pre-2.2 versions of pandas to avoid diff --git a/narwhals/expr.py b/narwhals/expr.py index 6c2d28962..3846897bd 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -404,6 +404,29 @@ def all(self) -> Self: """ return self.__class__(lambda plx: self._call(plx).all()) + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + return self.__class__( + lambda plx: self._call(plx).ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + ) + def mean(self) -> Self: """ Get mean value. diff --git a/narwhals/series.py b/narwhals/series.py index 6f5223202..46ffe3ee1 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -383,6 +383,38 @@ def name(self) -> str: """ return self._compliant_series.name # type: ignore[no-any-return] + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + """ + Compute exponentially-weighted moving average. + + Arguments: + com: Specify decay in terms of center of mass, $\\gamma$, with $\\alpha = \\frac{1}{1+\\gamma}\\;\\forall\\;\\gamma\\geq0$ + span: Specify decay in terms of span, $\\theta$, with $\\alpha = \\frac{2}{\\theta + 1} \\; \\forall \\; \\theta \\geq 1$ + + Examples + """ + return self._from_compliant_series( + self._compliant_series.ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + ) + def cast( self, dtype: Any, From 3d1e4661e69590219af2900e82beace43d7dc8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 30 Oct 2024 18:41:55 +0100 Subject: [PATCH 02/25] wip --- docs/javascripts/katex.js | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 docs/javascripts/katex.js diff --git a/docs/javascripts/katex.js b/docs/javascripts/katex.js new file mode 100644 index 000000000..3828300a7 --- /dev/null +++ b/docs/javascripts/katex.js @@ -0,0 +1,10 @@ +document$.subscribe(({ body }) => { + renderMathInElement(body, { + delimiters: [ + { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, + { left: "\\(", right: "\\)", display: false }, + { left: "\\[", right: "\\]", display: true } + ], + }) + }) \ No newline at end of file From ac0c3f78e7c14e1b8e0cc855568bc9cb66f3cea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 30 Oct 2024 19:28:22 +0100 Subject: [PATCH 03/25] latex works --- narwhals/series.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/narwhals/series.py b/narwhals/series.py index 46ffe3ee1..9c2671cbb 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -400,6 +400,36 @@ def ewm_mean( Arguments: com: Specify decay in terms of center of mass, $\\gamma$, with $\\alpha = \\frac{1}{1+\\gamma}\\;\\forall\\;\\gamma\\geq0$ span: Specify decay in terms of span, $\\theta$, with $\\alpha = \\frac{2}{\\theta + 1} \\; \\forall \\; \\theta \\geq 1$ + half_life: Specify decay in terms of half-life, $\\tau$, with $\\alpha = 1 - \exp \left\{ \\frac{ -\ln(2) }{ \\tau } \\right\} \; \\forall \; \\tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \\alpha \leq 1$. + adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + + - When `adjust=True` (the default) the EW function is calculated + using weights $w_i = (1 - \\alpha)^i$ + - When `adjust=False` the EW function is calculated recursively by + $$ + y_0=x_0 + $$ + $$ + y_t = (1 - \\alpha)y_{t - 1} + \\alpha x_t + $$ + min_periods: Minimum number of observations in window required to have a value + (otherwise result is null). + ignore_nulls: Ignore missing values when calculating weights. + + - When `ignore_nulls=False` (default), weights are based on absolute + positions. + For example, the weights of $x_0$ and $x_2$ used in + calculating the final weighted average of $[x_0, None, x_2]$ are + $(1-\\alpha)^2$ and $1$ if `adjust=True`, and + $(1-\\alpha)^2$ and $\\alpha$ if `adjust=False`. + + - When `ignore_nulls=True`, weights are based + on relative positions. For example, the weights of + $x_0$ and $x_2$ used in calculating the final weighted + average of $[x_0, None, x_2]$ are + $1-\\alpha$ and $1$ if `adjust=True`, + and $1-\\alpha$ and $\\alpha$ if `adjust=False`. Examples """ From 14dd1c5968718d43d7d8f9441d309e2a43d89481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 31 Oct 2024 10:51:04 +0100 Subject: [PATCH 04/25] doc test series --- narwhals/series.py | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/narwhals/series.py b/narwhals/series.py index 9c2671cbb..6f6f783b4 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -398,10 +398,10 @@ def ewm_mean( Compute exponentially-weighted moving average. Arguments: - com: Specify decay in terms of center of mass, $\\gamma$, with $\\alpha = \\frac{1}{1+\\gamma}\\;\\forall\\;\\gamma\\geq0$ - span: Specify decay in terms of span, $\\theta$, with $\\alpha = \\frac{2}{\\theta + 1} \\; \\forall \\; \\theta \\geq 1$ - half_life: Specify decay in terms of half-life, $\\tau$, with $\\alpha = 1 - \exp \left\{ \\frac{ -\ln(2) }{ \\tau } \\right\} \; \\forall \; \\tau > 0$ - alpha: Specify smoothing factor alpha directly, $0 < \\alpha \leq 1$. + com: Specify decay in terms of center of mass, $\\gamma$, with
$\\alpha = \\frac{1}{1+\\gamma}\\forall\\gamma\\geq0$ + span: Specify decay in terms of span, $\\theta$, with
$\\alpha = \\frac{2}{\\theta + 1} \\forall \\theta \\geq 1$ + half_life: Specify decay in terms of half-life, $\\tau$, with
$\\alpha = 1 - \\exp \\left\\{ \\frac{ -\\ln(2) }{ \\tau } \\right\\} \\forall \\tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \\alpha \\leq 1$. adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - When `adjust=True` (the default) the EW function is calculated @@ -431,7 +431,37 @@ def ewm_mean( $1-\\alpha$ and $1$ if `adjust=True`, and $1-\\alpha$ and $\\alpha$ if `adjust=False`. - Examples + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(name="a", data=data) + >>> s_pl = pl.Series(name="a", values=data) + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.ewm_mean(com=1, ignore_nulls=False) + + We can then pass either pandas or Polars to `func`: + + >>> func(s_pd) + 0 1.000000 + 1 1.666667 + 2 2.428571 + Name: a, dtype: float64 + + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'a' [f64] + [ + 1.0 + 1.666667 + 2.428571 + ] + """ return self._from_compliant_series( self._compliant_series.ewm_mean( From a4b5bd754591ac6e89406ef65a32c585e977a0fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 31 Oct 2024 15:10:21 +0100 Subject: [PATCH 05/25] expr docstring --- docs/api-reference/expr.md | 1 + narwhals/expr.py | 69 ++++++++++++++++++++++++++++++++++++++ narwhals/series.py | 29 ++++++++-------- 3 files changed, 84 insertions(+), 15 deletions(-) diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index 7188b2c36..092bce725 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -14,6 +14,7 @@ - cum_sum - diff - drop_nulls + - ewm_mean - fill_null - filter - gather_every diff --git a/narwhals/expr.py b/narwhals/expr.py index 3846897bd..1b9c689e5 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -415,6 +415,75 @@ def ewm_mean( min_periods: int = 1, ignore_nulls: bool = False, ) -> Self: + r""" + Compute exponentially-weighted moving average. + + Arguments: + com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ + span: Specify decay in terms of span, $\theta$, with
$\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$ + half_life: Specify decay in terms of half-life, $\tau$, with
$\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$. + adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + + - When `adjust=True` (the default) the EW function is calculated + using weights $w_i = (1 - \alpha)^i$ + - When `adjust=False` the EW function is calculated recursively by + $$ + y_0=x_0 + $$ + $$ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + $$ + min_periods: Minimum number of observations in window required to have a value, (otherwise result is null). + ignore_nulls: Ignore missing values when calculating weights. + + - When `ignore_nulls=False` (default), weights are based on absolute + positions. + For example, the weights of $x_0$ and $x_2$ used in + calculating the final weighted average of $[x_0, None, x_2]$ are + $(1-\alpha)^2$ and $1$ if `adjust=True`, and + $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. + + - When `ignore_nulls=True`, weights are based + on relative positions. For example, the weights of + $x_0$ and $x_2$ used in calculating the final weighted + average of $[x_0, None, x_2]$ are + $1-\alpha$ and $1$ if `adjust=True`, + and $1-\alpha$ and $\alpha$ if `adjust=False`. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = {"a": [1, 2, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=False)) + + We can then pass either pandas or Polars to `func`: + + + + + + >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 1.0 │ + │ 1.666667 │ + │ 2.428571 │ + └──────────┘ + + """ return self.__class__( lambda plx: self._call(plx).ewm_mean( com=com, diff --git a/narwhals/series.py b/narwhals/series.py index 6f6f783b4..d5fa9b85d 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -394,42 +394,41 @@ def ewm_mean( min_periods: int = 1, ignore_nulls: bool = False, ) -> Self: - """ + r""" Compute exponentially-weighted moving average. Arguments: - com: Specify decay in terms of center of mass, $\\gamma$, with
$\\alpha = \\frac{1}{1+\\gamma}\\forall\\gamma\\geq0$ - span: Specify decay in terms of span, $\\theta$, with
$\\alpha = \\frac{2}{\\theta + 1} \\forall \\theta \\geq 1$ - half_life: Specify decay in terms of half-life, $\\tau$, with
$\\alpha = 1 - \\exp \\left\\{ \\frac{ -\\ln(2) }{ \\tau } \\right\\} \\forall \\tau > 0$ - alpha: Specify smoothing factor alpha directly, $0 < \\alpha \\leq 1$. + com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ + span: Specify decay in terms of span, $\theta$, with
$\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$ + half_life: Specify decay in terms of half-life, $\tau$, with
$\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$. adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - When `adjust=True` (the default) the EW function is calculated - using weights $w_i = (1 - \\alpha)^i$ - - When `adjust=False` the EW function is calculated recursively by + using weights $w_i = (1 - \alpha)^i$ + - When `adjust=False` the EW function is calculated recursively by $$ y_0=x_0 $$ $$ - y_t = (1 - \\alpha)y_{t - 1} + \\alpha x_t + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t $$ - min_periods: Minimum number of observations in window required to have a value - (otherwise result is null). + min_periods: Minimum number of observations in window required to have a value (otherwise result is null). ignore_nulls: Ignore missing values when calculating weights. - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted average of $[x_0, None, x_2]$ are - $(1-\\alpha)^2$ and $1$ if `adjust=True`, and - $(1-\\alpha)^2$ and $\\alpha$ if `adjust=False`. + $(1-\alpha)^2$ and $1$ if `adjust=True`, and + $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted average of $[x_0, None, x_2]$ are - $1-\\alpha$ and $1$ if `adjust=True`, - and $1-\\alpha$ and $\\alpha$ if `adjust=False`. + $1-\alpha$ and $1$ if `adjust=True`, + and $1-\alpha$ and $\alpha$ if `adjust=False`. Examples: >>> import pandas as pd @@ -452,7 +451,7 @@ def ewm_mean( 1 1.666667 2 2.428571 Name: a, dtype: float64 - + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: 'a' [f64] From 686e33ca5cc687e4440a9f860c3b8b8e38afc5aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Thu, 31 Oct 2024 15:21:19 +0100 Subject: [PATCH 06/25] added to pandaslikeexpr --- narwhals/_pandas_like/expr.py | 23 +++++++++++++++++++++++ narwhals/expr.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index a58597eea..a55afa4f0 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -263,6 +263,29 @@ def is_in(self, other: Any) -> Self: def arg_true(self) -> Self: return reuse_series_implementation(self, "arg_true") + def ewm_mean( + self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + return reuse_series_implementation( + self, + "ewm_mean", + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + def filter(self, *predicates: Any) -> Self: plx = self.__narwhals_namespace__() other = plx.all_horizontal(*predicates) diff --git a/narwhals/expr.py b/narwhals/expr.py index 1b9c689e5..6b028b555 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -468,7 +468,7 @@ def ewm_mean( We can then pass either pandas or Polars to `func`: - + >>> func(df_pd) >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE From 9113a9d9efa0dff1a878cc1f158750370e47046f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 1 Nov 2024 15:35:44 +0100 Subject: [PATCH 07/25] added wip test --- tests/expr_and_series/ewm_test.py | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/expr_and_series/ewm_test.py diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py new file mode 100644 index 000000000..296465235 --- /dev/null +++ b/tests/expr_and_series/ewm_test.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import Constructor +from tests.utils import ConstructorEager +from tests.utils import assert_equal_data + +data = {"a": [1, 1, 2], "b": [1, 2, 3]} + + +def test_ewm_mean_expr(constructor: Constructor) -> None: + if "pyarrow_" in str(constructor) or "dask" in str(constructor): # remove + pytest.skip() + + df = nw.from_native(constructor(data)) + result = df.select(nw.col("a", "b").ewm_mean(com=1)) + expected = { + "a": [1.0, 1.0, 1.5714285714285714], + "b": [1.0, 1.6666666666666667, 2.4285714285714284], + } + assert_equal_data(result, expected) + + +def test_ewm_mean_series(constructor_eager: ConstructorEager) -> None: + if "pyarrow_" in str(constructor_eager) or "daks" in str(constructor_eager): # remove + pytest.skip() + + series = nw.from_native(constructor_eager(data), eager_only=True)["a"] + result = series.ewm_mean(com=1) + expected = {"a": [1.0, 1.0, 1.5714285714285714]} + assert_equal_data({"a": result}, expected) From 212b78a17af41cc1a6cd53825236cca49f6b7536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 1 Nov 2024 16:17:25 +0100 Subject: [PATCH 08/25] wip --- tests/expr_and_series/ewm_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 296465235..ac7f89970 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -24,7 +24,7 @@ def test_ewm_mean_expr(constructor: Constructor) -> None: def test_ewm_mean_series(constructor_eager: ConstructorEager) -> None: - if "pyarrow_" in str(constructor_eager) or "daks" in str(constructor_eager): # remove + if "pyarrow_" in str(constructor_eager): # remove pytest.skip() series = nw.from_native(constructor_eager(data), eager_only=True)["a"] From e5b948629ca1f8f277dae444a87b2b6127d39d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sat, 2 Nov 2024 18:48:09 +0100 Subject: [PATCH 09/25] added dask not implemented error test --- narwhals/_dask/expr.py | 14 ++++++++++++++ narwhals/expr.py | 3 +++ narwhals/series.py | 3 +++ tests/expr_and_series/ewm_test.py | 16 +++++++++++++++- 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index db29f6c4d..7b21c9f0e 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -462,6 +462,20 @@ def round(self, decimals: int) -> Self: returns_scalar=False, ) + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> NoReturn: + msg = "`Expr.ewm_mean` is not supported for the Dask backend" + raise NotImplementedError(msg) + def unique(self) -> NoReturn: # We can't (yet?) allow methods which modify the index msg = "`Expr.unique` is not supported for the Dask backend. Please use `LazyFrame.unique` instead." diff --git a/narwhals/expr.py b/narwhals/expr.py index 6e667694b..68744a49a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -448,6 +448,9 @@ def ewm_mean( $1-\alpha$ and $1$ if `adjust=True`, and $1-\alpha$ and $\alpha$ if `adjust=False`. + Returns: + Expr + Examples: >>> import pandas as pd >>> import polars as pl diff --git a/narwhals/series.py b/narwhals/series.py index f2eab8369..e1dbd437c 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -430,6 +430,9 @@ def ewm_mean( $1-\alpha$ and $1$ if `adjust=True`, and $1-\alpha$ and $\alpha$ if `adjust=False`. + Returns: + Series + Examples: >>> import pandas as pd >>> import polars as pl diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 7b9893c0c..d016d3fb0 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import pandas as pd import pytest import narwhals.stable.v1 as nw @@ -11,7 +12,7 @@ def test_ewm_mean_expr(constructor: Constructor) -> None: - if "pyarrow_" in str(constructor) or "dask" in str(constructor): # remove + if any(x in str(constructor) for x in ("pyarrow_", "dask")): # remove pytest.skip() df = nw.from_native(constructor(data)) @@ -56,3 +57,16 @@ def test_ewm_mean_expr_adjust( } assert_equal_data(result, expected) + + +def test_ewm_mean_dask_raise() -> None: + pytest.importorskip("dask") + pytest.importorskip("dask_expr", exc_type=ImportError) + import dask.dataframe as dd + + df = nw.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}))) + with pytest.raises( + NotImplementedError, + match="`Expr.ewm_mean` is not supported for the Dask backend", + ): + df.select(nw.col("a").ewm_mean(com=1)) From 1dfab2ce61ca0a3485a48f6ceda540878cebefb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sun, 3 Nov 2024 15:13:37 +0100 Subject: [PATCH 10/25] added test with nulls --- narwhals/expr.py | 7 +-- tests/expr_and_series/ewm_test.py | 72 +++++++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 68744a49a..da24ef6ee 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -467,9 +467,11 @@ def ewm_mean( We can then pass either pandas or Polars to `func`: - >>> func(df_pd) - + a + 0 1.000000 + 1 1.666667 + 2 2.428571 >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3, 1) @@ -482,7 +484,6 @@ def ewm_mean( │ 1.666667 │ │ 2.428571 │ └──────────┘ - """ return self.__class__( lambda plx: self._call(plx).ewm_mean( diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index d016d3fb0..468288d62 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -11,9 +11,9 @@ data = {"a": [1, 1, 2], "b": [1, 2, 3]} -def test_ewm_mean_expr(constructor: Constructor) -> None: - if any(x in str(constructor) for x in ("pyarrow_", "dask")): # remove - pytest.skip() +def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.select(nw.col("a", "b").ewm_mean(com=1)) @@ -24,9 +24,11 @@ def test_ewm_mean_expr(constructor: Constructor) -> None: assert_equal_data(result, expected) -def test_ewm_mean_series(constructor_eager: ConstructorEager) -> None: - if "pyarrow_" in str(constructor_eager): # remove - pytest.skip() +def test_ewm_mean_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if "pyarrow_table_" in str(constructor_eager): + request.applymarker(pytest.mark.xfail) series = nw.from_native(constructor_eager(data), eager_only=True)["a"] result = series.ewm_mean(com=1) @@ -36,11 +38,12 @@ def test_ewm_mean_series(constructor_eager: ConstructorEager) -> None: @pytest.mark.parametrize("adjust", [True, False]) def test_ewm_mean_expr_adjust( + request: pytest.FixtureRequest, constructor: Constructor, adjust: bool, # noqa: FBT001 ) -> None: - if "pyarrow_" in str(constructor) or "dask" in str(constructor): # remove - pytest.skip() + if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.select(nw.col("a", "b").ewm_mean(com=1, adjust=adjust)) @@ -70,3 +73,56 @@ def test_ewm_mean_dask_raise() -> None: match="`Expr.ewm_mean` is not supported for the Dask backend", ): df.select(nw.col("a").ewm_mean(com=1)) + + +@pytest.mark.parametrize("ignore_nulls", [True, False]) +def test_ewm_mean_nulls( + request: pytest.FixtureRequest, + ignore_nulls: bool, # noqa: FBT001 + constructor: Constructor, +) -> None: + if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native( + constructor({"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}) + ) + result = df.select(nw.col("a", "b").ewm_mean(com=1, ignore_nulls=ignore_nulls)) + + constructor_type = "polars" if "polars" in str(constructor) else "other" + + expected_results: dict[tuple[str, bool], dict[str, list[float | None]]] = { + ("polars", False): { + "a": [2.0, 3.3333333333333335, None, 3.090909090909091, 4.222222222222222], + "b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")], + }, + ("polars", True): { + "a": [2.0, 3.3333333333333335, None, 3.142857142857143, 4.133333333333334], + "b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")], + }, + ("other", False): { + "a": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222], + "b": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222], + }, + ("other", True): { + "a": [ + 2.0, + 3.3333333333333335, + 3.3333333333333335, + 3.142857142857143, + 4.133333333333334, + ], + "b": [ + 2.0, + 3.3333333333333335, + 3.3333333333333335, + 3.142857142857143, + 4.133333333333334, + ], + }, + } + + expected: dict[str, list[float | None]] = expected_results[ + (constructor_type, ignore_nulls) + ] + assert_equal_data(result, expected) From 6f738cd6284a3fe57bcb27843668b3a3457a1d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sun, 3 Nov 2024 18:07:04 +0100 Subject: [PATCH 11/25] example with nulls --- narwhals/expr.py | 31 +++++++++++++++++++ narwhals/series.py | 27 +++++++++++++++++ tests/expr_and_series/ewm_test.py | 50 ++++++++++++++++++++----------- 3 files changed, 90 insertions(+), 18 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index da24ef6ee..405e26ae7 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -484,6 +484,37 @@ def ewm_mean( │ 1.666667 │ │ 2.428571 │ └──────────┘ + + pandas and Polars handle nulls differently. So, When calculating ewm over + a sequence with null values, leading to distinct results: + + >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} + >>> df_pd2 = pd.DataFrame(data) + >>> df_pl2 = pl.DataFrame(data) + + >>> func(df_pd2) + a + 0 2.000000 + 1 3.333333 + 2 3.333333 + 3 3.090909 + 4 3.090909 + 5 3.023256 + + >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE + shape: (6, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 2.0 │ + │ 3.333333 │ + │ null │ + │ 3.090909 │ + │ NaN │ + │ NaN │ + └──────────┘ """ return self.__class__( lambda plx: self._call(plx).ewm_mean( diff --git a/narwhals/series.py b/narwhals/series.py index e1dbd437c..54e273a45 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -464,6 +464,33 @@ def ewm_mean( 2.428571 ] + pandas and Polars handle nulls differently. So, When calculating ewm over + a sequence with null values, leading to distinct results: + + >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] + >>> s_pd2 = pd.Series(name="a", data=data) + >>> s_pl2 = pl.Series(name="a", values=data) + + >>> func(s_pd2) + 0 2.000000 + 1 3.333333 + 2 3.333333 + 3 3.090909 + 4 3.090909 + 5 3.023256 + Name: a, dtype: float64 + + >>> func(s_pl2) # doctest: +NORMALIZE_WHITESPACE + shape: (6,) + Series: 'a' [f64] + [ + 2.0 + 3.333333 + null + 3.090909 + NaN + NaN + ] """ return self._from_compliant_series( self._compliant_series.ewm_mean( diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 468288d62..7afd31efe 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -81,28 +81,48 @@ def test_ewm_mean_nulls( ignore_nulls: bool, # noqa: FBT001 constructor: Constructor, ) -> None: + # When calculating ewm over a sequence with null values, pandas and Polars handle nulls differently, + # leading to distinct results: + # For non-null entries in the sequence, both exclude null values during ewm calculation, + # and both produce the same result for these non-null entries. The weights for these values are determined by + # the ignore_nulls parameter. + # For null values, however, Pandas calculates an ewm value, while Polars returns null for these positions. + # + # Also, NaN values are treated differently between the two libraries: + # In Polars, NaN values are not treated as nulls, so a NaN entry results in NaN for that entry and + # for all subsequent entries in the EWM calculation. + # In pandas, NaN values are considered nulls, so Pandas computes an EWM value for these entries instead. + if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): request.applymarker(pytest.mark.xfail) - df = nw.from_native( - constructor({"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}) - ) - result = df.select(nw.col("a", "b").ewm_mean(com=1, ignore_nulls=ignore_nulls)) - + df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]})) + result = df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=ignore_nulls)) constructor_type = "polars" if "polars" in str(constructor) else "other" expected_results: dict[tuple[str, bool], dict[str, list[float | None]]] = { ("polars", False): { - "a": [2.0, 3.3333333333333335, None, 3.090909090909091, 4.222222222222222], - "b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")], + "a": [ + 2.0, + 3.3333333333333335, + None, + 3.090909090909091, + float("nan"), + float("nan"), + ], }, ("polars", True): { - "a": [2.0, 3.3333333333333335, None, 3.142857142857143, 4.133333333333334], - "b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")], + "a": [ + 2.0, + 3.3333333333333335, + None, + 3.142857142857143, + float("nan"), + float("nan"), + ], }, ("other", False): { - "a": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222], - "b": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222], + "a": [2.000000, 3.333333, 3.333333, 3.090909, 3.090909, 3.023256], }, ("other", True): { "a": [ @@ -110,14 +130,8 @@ def test_ewm_mean_nulls( 3.3333333333333335, 3.3333333333333335, 3.142857142857143, - 4.133333333333334, - ], - "b": [ - 2.0, - 3.3333333333333335, - 3.3333333333333335, 3.142857142857143, - 4.133333333333334, + 3.066666666666667, ], }, } From 73cc573df3ab3b59836e24bfa0c7a56c98196424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sun, 3 Nov 2024 18:18:30 +0100 Subject: [PATCH 12/25] fixed mkdocs issue --- narwhals/expr.py | 56 +++++++++++++++++++++++----------------------- narwhals/series.py | 4 ++-- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 405e26ae7..18b5d196d 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -485,36 +485,36 @@ def ewm_mean( │ 2.428571 │ └──────────┘ - pandas and Polars handle nulls differently. So, When calculating ewm over - a sequence with null values, leading to distinct results: + pandas and Polars handle nulls differently. So, calculating ewm over + a sequence with null values leads to distinct results: - >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} - >>> df_pd2 = pd.DataFrame(data) - >>> df_pl2 = pl.DataFrame(data) + >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} + >>> df_pd2 = pd.DataFrame(data) + >>> df_pl2 = pl.DataFrame(data) - >>> func(df_pd2) - a - 0 2.000000 - 1 3.333333 - 2 3.333333 - 3 3.090909 - 4 3.090909 - 5 3.023256 - - >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE - shape: (6, 1) - ┌──────────┐ - │ a │ - │ --- │ - │ f64 │ - ╞══════════╡ - │ 2.0 │ - │ 3.333333 │ - │ null │ - │ 3.090909 │ - │ NaN │ - │ NaN │ - └──────────┘ + >>> func(df_pd2) + a + 0 2.000000 + 1 3.333333 + 2 3.333333 + 3 3.090909 + 4 3.090909 + 5 3.023256 + + >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE + shape: (6, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 2.0 │ + │ 3.333333 │ + │ null │ + │ 3.090909 │ + │ NaN │ + │ NaN │ + └──────────┘ """ return self.__class__( lambda plx: self._call(plx).ewm_mean( diff --git a/narwhals/series.py b/narwhals/series.py index 54e273a45..72b2b8aac 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -464,8 +464,8 @@ def ewm_mean( 2.428571 ] - pandas and Polars handle nulls differently. So, When calculating ewm over - a sequence with null values, leading to distinct results: + pandas and Polars handle nulls differently. So, calculating ewm over + a sequence with null values leads to distinct results: >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] >>> s_pd2 = pd.Series(name="a", data=data) From 5cd48336dfb737381c251ab808f0420ef71bf1f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 10:10:52 +0100 Subject: [PATCH 13/25] Match polars' None in input --- narwhals/_pandas_like/series.py | 3 +- narwhals/expr.py | 4 +-- narwhals/series.py | 9 +++--- tests/expr_and_series/ewm_test.py | 51 ++++++------------------------- 4 files changed, 18 insertions(+), 49 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 008d4eb4d..0329494ae 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -186,10 +186,11 @@ def ewm_mean( ignore_nulls: bool = False, ) -> PandasLikeSeries: ser = self._native_series + mask_na = ser.isna() result = ser.ewm( com, span, half_life, alpha, min_periods, adjust, ignore_na=ignore_nulls ).mean() - + result[mask_na] = None return self._from_native_series(result) def scatter(self, indices: int | Sequence[int], values: Any) -> Self: diff --git a/narwhals/expr.py b/narwhals/expr.py index 18b5d196d..1f953a140 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -496,9 +496,9 @@ def ewm_mean( a 0 2.000000 1 3.333333 - 2 3.333333 + 2 NaN 3 3.090909 - 4 3.090909 + 4 NaN 5 3.023256 >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE diff --git a/narwhals/series.py b/narwhals/series.py index 72b2b8aac..afb25db3a 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -464,8 +464,9 @@ def ewm_mean( 2.428571 ] - pandas and Polars handle nulls differently. So, calculating ewm over - a sequence with null values leads to distinct results: + pandas and Polars handle NaN differently. So, calculating ewm over + a sequence with null values leads to distinct results. Narwhals + matches Polars' results when "None" is in the input. >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] >>> s_pd2 = pd.Series(name="a", data=data) @@ -474,9 +475,9 @@ def ewm_mean( >>> func(s_pd2) 0 2.000000 1 3.333333 - 2 3.333333 + 2 NaN 3 3.090909 - 4 3.090909 + 4 NaN 5 3.023256 Name: a, dtype: float64 diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 7afd31efe..272131f72 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -81,62 +81,29 @@ def test_ewm_mean_nulls( ignore_nulls: bool, # noqa: FBT001 constructor: Constructor, ) -> None: - # When calculating ewm over a sequence with null values, pandas and Polars handle nulls differently, - # leading to distinct results: - # For non-null entries in the sequence, both exclude null values during ewm calculation, - # and both produce the same result for these non-null entries. The weights for these values are determined by - # the ignore_nulls parameter. - # For null values, however, Pandas calculates an ewm value, while Polars returns null for these positions. - # - # Also, NaN values are treated differently between the two libraries: - # In Polars, NaN values are not treated as nulls, so a NaN entry results in NaN for that entry and - # for all subsequent entries in the EWM calculation. - # In pandas, NaN values are considered nulls, so Pandas computes an EWM value for these entries instead. - if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]})) + df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0]})) result = df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=ignore_nulls)) - constructor_type = "polars" if "polars" in str(constructor) else "other" - expected_results: dict[tuple[str, bool], dict[str, list[float | None]]] = { - ("polars", False): { + if ignore_nulls: + expected = { "a": [ 2.0, 3.3333333333333335, - None, - 3.090909090909091, float("nan"), - float("nan"), - ], - }, - ("polars", True): { - "a": [ - 2.0, - 3.3333333333333335, - None, 3.142857142857143, - float("nan"), - float("nan"), ], - }, - ("other", False): { - "a": [2.000000, 3.333333, 3.333333, 3.090909, 3.090909, 3.023256], - }, - ("other", True): { + } + else: + expected = { "a": [ 2.0, 3.3333333333333335, - 3.3333333333333335, - 3.142857142857143, - 3.142857142857143, - 3.066666666666667, + float("nan"), + 3.090909090909091, ], - }, - } + } - expected: dict[str, list[float | None]] = expected_results[ - (constructor_type, ignore_nulls) - ] assert_equal_data(result, expected) From 19da3a374999b4676dd9090754bad0d31316145e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 18:45:17 +0100 Subject: [PATCH 14/25] polars version --- tests/expr_and_series/ewm_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 272131f72..0a94da01f 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -4,6 +4,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import POLARS_VERSION from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -75,6 +76,9 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) +@pytest.mark.skipif( + POLARS_VERSION < (0, 20, 13), reason="Polars changed how it handles None" +) @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( request: pytest.FixtureRequest, From 6cc0a960689173c4cd08267eccc9b697af409aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 19:00:14 +0100 Subject: [PATCH 15/25] polars version again --- tests/expr_and_series/ewm_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 0a94da01f..63325249a 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -77,7 +77,7 @@ def test_ewm_mean_dask_raise() -> None: @pytest.mark.skipif( - POLARS_VERSION < (0, 20, 13), reason="Polars changed how it handles None" + POLARS_VERSION <= (0, 20, 13), reason="Polars changed how it handles None" ) @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( From afb3ed3b63e4f690ba9607837fa01629bbeebedd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 19:10:40 +0100 Subject: [PATCH 16/25] again --- tests/expr_and_series/ewm_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 63325249a..45cea6e0d 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -77,7 +77,7 @@ def test_ewm_mean_dask_raise() -> None: @pytest.mark.skipif( - POLARS_VERSION <= (0, 20, 13), reason="Polars changed how it handles None" + POLARS_VERSION <= (0, 20, 31), reason="Polars changed how it handles None" ) @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( From ee2e91648a83365d29a5b4353d389ddbee609a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 19:36:34 +0100 Subject: [PATCH 17/25] wip --- tests/expr_and_series/ewm_test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 45cea6e0d..a57326808 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -76,16 +76,15 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) -@pytest.mark.skipif( - POLARS_VERSION <= (0, 20, 31), reason="Polars changed how it handles None" -) @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( request: pytest.FixtureRequest, ignore_nulls: bool, # noqa: FBT001 constructor: Constructor, ) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + if any(x in str(constructor) for x in ("pyarrow_table_", "dask")) or ( + "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0]})) From 7f872cfc9894af96f6f23879be7f268fb4950d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Wed, 6 Nov 2024 19:43:14 +0100 Subject: [PATCH 18/25] add modin to xfail --- tests/expr_and_series/ewm_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index a57326808..75923aea1 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -13,7 +13,7 @@ def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -28,7 +28,7 @@ def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) def test_ewm_mean_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if "pyarrow_table_" in str(constructor_eager): + if any(x in str(constructor_eager) for x in ("pyarrow_table_", "modin")): request.applymarker(pytest.mark.xfail) series = nw.from_native(constructor_eager(data), eager_only=True)["a"] @@ -43,7 +43,7 @@ def test_ewm_mean_expr_adjust( constructor: Constructor, adjust: bool, # noqa: FBT001 ) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask")): + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -82,7 +82,7 @@ def test_ewm_mean_nulls( ignore_nulls: bool, # noqa: FBT001 constructor: Constructor, ) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask")) or ( + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) ): request.applymarker(pytest.mark.xfail) From 3cbfe536b22d51a6899cf6b07fdc38c8be1582f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sun, 10 Nov 2024 20:32:39 +0100 Subject: [PATCH 19/25] ewm_mean not implemented yet for old Polars --- narwhals/_polars/expr.py | 30 ++++++++++++++++++++++++++++++ narwhals/_polars/series.py | 27 +++++++++++++++++++++++++++ tests/expr_and_series/ewm_test.py | 25 ++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 8a4c93736..d70f354b1 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -7,6 +7,7 @@ from narwhals._polars.utils import extract_native from narwhals._polars.utils import narwhals_to_native_dtype from narwhals.utils import Implementation +from narwhals.utils import parse_version if TYPE_CHECKING: from typing_extensions import Self @@ -41,6 +42,35 @@ def cast(self, dtype: DType) -> Self: dtype = narwhals_to_native_dtype(dtype, self._dtypes) return self._from_native_expr(expr.cast(dtype)) + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + import polars as pl # ignore-banned-import() + + if parse_version(pl.__version__) <= (0, 20, 31): # pragma: no cover + msg = "`ewm_mean` not implemented for polars older than 0.20.31" + raise NotImplementedError(msg) + expr = self._native_expr + return self._from_native_expr( + expr.ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + ) + def __eq__(self, other: object) -> Self: # type: ignore[override] return self._from_native_expr(self._native_expr.__eq__(extract_native(other))) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 9ca2f7040..165edabea 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -196,6 +196,33 @@ def to_dummies( result, backend_version=self._backend_version, dtypes=self._dtypes ) + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + if self._backend_version < (0, 20, 31): # pragma: no cover + msg = "`ewm_mean` not implemented for polars older than 0.20.31" + raise NotImplementedError(msg) + expr = self._native_series + return self._from_native_series( + expr.ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + ) + def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: if self._backend_version < (0, 20, 6): # pragma: no cover result = self._native_series.sort(descending=descending) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 75923aea1..a9af8ae93 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -13,7 +13,9 @@ def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")): + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( + "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -28,7 +30,9 @@ def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) def test_ewm_mean_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(x in str(constructor_eager) for x in ("pyarrow_table_", "modin")): + if any(x in str(constructor_eager) for x in ("pyarrow_table_", "modin")) or ( + "polars" in str(constructor_eager) and POLARS_VERSION <= (0, 20, 31) + ): request.applymarker(pytest.mark.xfail) series = nw.from_native(constructor_eager(data), eager_only=True)["a"] @@ -43,7 +47,9 @@ def test_ewm_mean_expr_adjust( constructor: Constructor, adjust: bool, # noqa: FBT001 ) -> None: - if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")): + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( + "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -76,6 +82,19 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) +def test_ewm_mean_old_raise() -> None: + pytest.importorskip("polars") + import polars as pl + + df = nw.from_native(pl.DataFrame(data)) + if POLARS_VERSION <= (0, 20, 31): + with pytest.raises( + NotImplementedError, + match="ewm_mean` not implemented for polars older than 0.20.31", + ): + df.select(nw.col("a").ewm_mean(com=1)) + + @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( request: pytest.FixtureRequest, From a6c45253291004425d87e925de592006901164ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sun, 10 Nov 2024 20:47:55 +0100 Subject: [PATCH 20/25] remove unused test --- tests/expr_and_series/ewm_test.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index a9af8ae93..cc2cd9721 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -82,19 +82,6 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) -def test_ewm_mean_old_raise() -> None: - pytest.importorskip("polars") - import polars as pl - - df = nw.from_native(pl.DataFrame(data)) - if POLARS_VERSION <= (0, 20, 31): - with pytest.raises( - NotImplementedError, - match="ewm_mean` not implemented for polars older than 0.20.31", - ): - df.select(nw.col("a").ewm_mean(com=1)) - - @pytest.mark.parametrize("ignore_nulls", [True, False]) def test_ewm_mean_nulls( request: pytest.FixtureRequest, From a67aef099ff1b7f1ae5884177927889e9aae25cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Tue, 12 Nov 2024 11:18:27 +0100 Subject: [PATCH 21/25] parametrize expected --- docs/css/code_select.css | 7 +++ tests/expr_and_series/ewm_test.py | 82 ++++++++++++++++++------------- 2 files changed, 55 insertions(+), 34 deletions(-) create mode 100644 docs/css/code_select.css diff --git a/docs/css/code_select.css b/docs/css/code_select.css new file mode 100644 index 000000000..259cf11c5 --- /dev/null +++ b/docs/css/code_select.css @@ -0,0 +1,7 @@ +.highlight .gp, .highlight .go { /* Generic.Prompt, Generic.Output */ + user-select: none; + -webkit-user-select: none; /* Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* Internet Explorer/Edge */ + color: red; +} \ No newline at end of file diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index cc2cd9721..35c902f04 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -41,11 +41,30 @@ def test_ewm_mean_series( assert_equal_data({"a": result}, expected) -@pytest.mark.parametrize("adjust", [True, False]) +@pytest.mark.parametrize( + ("adjust", "expected"), + [ + ( + True, + { + "a": [1.0, 1.0, 1.5714285714285714], + "b": [1.0, 1.6666666666666667, 2.4285714285714284], + }, + ), + ( + False, + { + "a": [1.0, 1.0, 1.5], + "b": [1.0, 1.5, 2.25], + }, + ), + ], +) def test_ewm_mean_expr_adjust( request: pytest.FixtureRequest, constructor: Constructor, adjust: bool, # noqa: FBT001 + expected: dict[str, list[float]], ) -> None: if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) @@ -54,18 +73,6 @@ def test_ewm_mean_expr_adjust( df = nw.from_native(constructor(data)) result = df.select(nw.col("a", "b").ewm_mean(com=1, adjust=adjust)) - - if adjust: - expected = { - "a": [1.0, 1.0, 1.5714285714285714], - "b": [1.0, 1.6666666666666667, 2.4285714285714284], - } - else: - expected = { - "a": [1.0, 1.0, 1.5], - "b": [1.0, 1.5, 2.25], - } - assert_equal_data(result, expected) @@ -82,10 +89,37 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) -@pytest.mark.parametrize("ignore_nulls", [True, False]) +@pytest.mark.parametrize( + ("ignore_nulls", "expected"), + [ + ( + True, + { + "a": [ + 2.0, + 3.3333333333333335, + float("nan"), + 3.142857142857143, + ] + }, + ), + ( + False, + { + "a": [ + 2.0, + 3.3333333333333335, + float("nan"), + 3.090909090909091, + ] + }, + ), + ], +) def test_ewm_mean_nulls( request: pytest.FixtureRequest, ignore_nulls: bool, # noqa: FBT001 + expected: dict[str, list[float]], constructor: Constructor, ) -> None: if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( @@ -95,24 +129,4 @@ def test_ewm_mean_nulls( df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0]})) result = df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=ignore_nulls)) - - if ignore_nulls: - expected = { - "a": [ - 2.0, - 3.3333333333333335, - float("nan"), - 3.142857142857143, - ], - } - else: - expected = { - "a": [ - 2.0, - 3.3333333333333335, - float("nan"), - 3.090909090909091, - ], - } - assert_equal_data(result, expected) From eddceb422809b594cc0af8dc8391dd1b59fdf057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Tue, 12 Nov 2024 15:26:33 +0100 Subject: [PATCH 22/25] test parameters --- tests/expr_and_series/ewm_test.py | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 35c902f04..928791584 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -130,3 +130,42 @@ def test_ewm_mean_nulls( df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0]})) result = df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=ignore_nulls)) assert_equal_data(result, expected) + + +def test_ewm_mean_params( + request: pytest.FixtureRequest, + constructor: Constructor, +) -> None: + if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( + "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) + ): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor({"a": [2, 5, 3]})) + expected = {"a": [2.0, 4.0, 3.4285714285714284]} + assert_equal_data( + df.select(nw.col("a").ewm_mean(alpha=0.5, adjust=True, ignore_nulls=True)), + expected, + ) + + expected = {"a": [2.0, 4.500000000000001, 3.2903225806451615]} + assert_equal_data( + df.select(nw.col("a").ewm_mean(span=1.5, adjust=True, ignore_nulls=True)), + expected, + ) + + expected = {"a": [2.0, 3.1101184251576903, 3.0693702609187237]} + assert_equal_data( + df.select(nw.col("a").ewm_mean(half_life=1.5, adjust=False)), expected + ) + + expected = {"a": [float("nan"), 4.0, 3.4285714285714284]} + assert_equal_data( + df.select( + nw.col("a").ewm_mean(alpha=0.5, adjust=True, min_periods=2, ignore_nulls=True) + ), + expected, + ) + + with pytest.raises(ValueError, match="mutually exclusive"): + df.select(nw.col("a").ewm_mean(span=1.5, half_life=0.75, ignore_nulls=False)) From 3f2a26d6d8993a67d55d8a799db9b9496950450f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 18 Nov 2024 16:27:33 +0100 Subject: [PATCH 23/25] added warning --- narwhals/expr.py | 8 +- narwhals/series.py | 7 +- narwhals/stable/v1/__init__.py | 267 ++++++++++++++++++++++++++++++ tests/expr_and_series/ewm_test.py | 18 ++ 4 files changed, 295 insertions(+), 5 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 52e7657ac..ea7271984 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -429,8 +429,11 @@ def ewm_mean( min_periods: int = 1, ignore_nulls: bool = False, ) -> Self: - r""" - Compute exponentially-weighted moving average. + r"""Compute exponentially-weighted moving average. + + !!! warning + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. Arguments: com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ @@ -438,7 +441,6 @@ def ewm_mean( half_life: Specify decay in terms of half-life, $\tau$, with
$\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$ alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$. adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - - When `adjust=True` (the default) the EW function is calculated using weights $w_i = (1 - \alpha)^i$ - When `adjust=False` the EW function is calculated recursively by diff --git a/narwhals/series.py b/narwhals/series.py index 21bfa7fa4..7e60b1581 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -387,8 +387,11 @@ def ewm_mean( min_periods: int = 1, ignore_nulls: bool = False, ) -> Self: - r""" - Compute exponentially-weighted moving average. + r"""Compute exponentially-weighted moving average. + + !!! warning + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. Arguments: com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index d35ecd434..174115813 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -493,6 +493,137 @@ def value_counts( sort=sort, parallel=parallel, name=name, normalize=normalize ) + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + r"""Compute exponentially-weighted moving average. + + !!! warning + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. + + Arguments: + com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ + span: Specify decay in terms of span, $\theta$, with
$\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$ + half_life: Specify decay in terms of half-life, $\tau$, with
$\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$. + adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + + - When `adjust=True` (the default) the EW function is calculated + using weights $w_i = (1 - \alpha)^i$ + - When `adjust=False` the EW function is calculated recursively by + $$ + y_0=x_0 + $$ + $$ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + $$ + min_periods: Minimum number of observations in window required to have a value (otherwise result is null). + ignore_nulls: Ignore missing values when calculating weights. + + - When `ignore_nulls=False` (default), weights are based on absolute + positions. + For example, the weights of $x_0$ and $x_2$ used in + calculating the final weighted average of $[x_0, None, x_2]$ are + $(1-\alpha)^2$ and $1$ if `adjust=True`, and + $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. + + - When `ignore_nulls=True`, weights are based + on relative positions. For example, the weights of + $x_0$ and $x_2$ used in calculating the final weighted + average of $[x_0, None, x_2]$ are + $1-\alpha$ and $1$ if `adjust=True`, + and $1-\alpha$ and $\alpha$ if `adjust=False`. + + Returns: + Series + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(name="a", data=data) + >>> s_pl = pl.Series(name="a", values=data) + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.ewm_mean(com=1, ignore_nulls=False) + + We can then pass either pandas or Polars to `func`: + + >>> func(s_pd) + 0 1.000000 + 1 1.666667 + 2 2.428571 + Name: a, dtype: float64 + + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'a' [f64] + [ + 1.0 + 1.666667 + 2.428571 + ] + + pandas and Polars handle NaN differently. So, calculating ewm over + a sequence with null values leads to distinct results. Narwhals + matches Polars' results when "None" is in the input. + + >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] + >>> s_pd2 = pd.Series(name="a", data=data) + >>> s_pl2 = pl.Series(name="a", values=data) + + >>> func(s_pd2) + 0 2.000000 + 1 3.333333 + 2 NaN + 3 3.090909 + 4 NaN + 5 3.023256 + Name: a, dtype: float64 + + >>> func(s_pl2) # doctest: +NORMALIZE_WHITESPACE + shape: (6,) + Series: 'a' [f64] + [ + 2.0 + 3.333333 + null + 3.090909 + NaN + NaN + ] + """ + from narwhals.exceptions import NarwhalsUnstableWarning + from narwhals.utils import find_stacklevel + + msg = ( + "`Series.ewm_mean` is being called from the stable API although considered " + "an unstable feature." + ) + warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) + return super().ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + def rolling_sum( self: Self, window_size: int, @@ -589,6 +720,142 @@ class Expr(NwExpr): def _l1_norm(self) -> Self: return super()._taxicab_norm() + def ewm_mean( + self: Self, + *, + com: float | None = None, + span: float | None = None, + half_life: float | None = None, + alpha: float | None = None, + adjust: bool = True, + min_periods: int = 1, + ignore_nulls: bool = False, + ) -> Self: + r"""Compute exponentially-weighted moving average. + + !!! warning + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. + + Arguments: + com: Specify decay in terms of center of mass, $\gamma$, with
$\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$ + span: Specify decay in terms of span, $\theta$, with
$\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$ + half_life: Specify decay in terms of half-life, $\tau$, with
$\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$ + alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$. + adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + + - When `adjust=True` (the default) the EW function is calculated + using weights $w_i = (1 - \alpha)^i$ + - When `adjust=False` the EW function is calculated recursively by + $$ + y_0=x_0 + $$ + $$ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + $$ + min_periods: Minimum number of observations in window required to have a value, (otherwise result is null). + ignore_nulls: Ignore missing values when calculating weights. + + - When `ignore_nulls=False` (default), weights are based on absolute + positions. + For example, the weights of $x_0$ and $x_2$ used in + calculating the final weighted average of $[x_0, None, x_2]$ are + $(1-\alpha)^2$ and $1$ if `adjust=True`, and + $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. + + - When `ignore_nulls=True`, weights are based + on relative positions. For example, the weights of + $x_0$ and $x_2$ used in calculating the final weighted + average of $[x_0, None, x_2]$ are + $1-\alpha$ and $1$ if `adjust=True`, + and $1-\alpha$ and $\alpha$ if `adjust=False`. + + Returns: + Expr + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = {"a": [1, 2, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=False)) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) + a + 0 1.000000 + 1 1.666667 + 2 2.428571 + + >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 1.0 │ + │ 1.666667 │ + │ 2.428571 │ + └──────────┘ + + pandas and Polars handle nulls differently. So, calculating ewm over + a sequence with null values leads to distinct results: + + >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} + >>> df_pd2 = pd.DataFrame(data) + >>> df_pl2 = pl.DataFrame(data) + + >>> func(df_pd2) + a + 0 2.000000 + 1 3.333333 + 2 NaN + 3 3.090909 + 4 NaN + 5 3.023256 + + >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE + shape: (6, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 2.0 │ + │ 3.333333 │ + │ null │ + │ 3.090909 │ + │ NaN │ + │ NaN │ + └──────────┘ + """ + from narwhals.exceptions import NarwhalsUnstableWarning + from narwhals.utils import find_stacklevel + + msg = ( + "`Expr.ewm_mean` is being called from the stable API although considered " + "an unstable feature." + ) + warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) + return super().ewm_mean( + com=com, + span=span, + half_life=half_life, + alpha=alpha, + adjust=adjust, + min_periods=min_periods, + ignore_nulls=ignore_nulls, + ) + def rolling_sum( self: Self, window_size: int, diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 928791584..8f52588e9 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -12,6 +12,9 @@ data = {"a": [1, 1, 2], "b": [1, 2, 3]} +@pytest.mark.filterwarnings( + "ignore:`Expr.ewm_mean` is being called from the stable API although considered an unstable feature." +) def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(x in str(constructor) for x in ("pyarrow_table_", "dask", "modin")) or ( "polars" in str(constructor) and POLARS_VERSION <= (0, 20, 31) @@ -27,6 +30,9 @@ def test_ewm_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) assert_equal_data(result, expected) +@pytest.mark.filterwarnings( + "ignore:`Series.ewm_mean` is being called from the stable API although considered an unstable feature." +) def test_ewm_mean_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: @@ -41,6 +47,9 @@ def test_ewm_mean_series( assert_equal_data({"a": result}, expected) +@pytest.mark.filterwarnings( + "ignore:`Expr.ewm_mean` is being called from the stable API although considered an unstable feature." +) @pytest.mark.parametrize( ("adjust", "expected"), [ @@ -76,6 +85,9 @@ def test_ewm_mean_expr_adjust( assert_equal_data(result, expected) +@pytest.mark.filterwarnings( + "ignore:`Expr.ewm_mean` is being called from the stable API although considered an unstable feature." +) def test_ewm_mean_dask_raise() -> None: pytest.importorskip("dask") pytest.importorskip("dask_expr", exc_type=ImportError) @@ -89,6 +101,9 @@ def test_ewm_mean_dask_raise() -> None: df.select(nw.col("a").ewm_mean(com=1)) +@pytest.mark.filterwarnings( + "ignore:`Expr.ewm_mean` is being called from the stable API although considered an unstable feature." +) @pytest.mark.parametrize( ("ignore_nulls", "expected"), [ @@ -132,6 +147,9 @@ def test_ewm_mean_nulls( assert_equal_data(result, expected) +@pytest.mark.filterwarnings( + "ignore:`Expr.ewm_mean` is being called from the stable API although considered an unstable feature." +) def test_ewm_mean_params( request: pytest.FixtureRequest, constructor: Constructor, From e8eb64541894779f011bcab75910c3b2fef4d4b3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:56:36 +0000 Subject: [PATCH 24/25] remove nan example, https://github.com/narwhals-dev/narwhals/issues/1401 --- narwhals/expr.py | 32 ------------------ narwhals/series.py | 30 ---------------- narwhals/stable/v1/__init__.py | 62 ---------------------------------- 3 files changed, 124 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index ea7271984..ddacd4fff 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -459,7 +459,6 @@ def ewm_mean( calculating the final weighted average of $[x_0, None, x_2]$ are $(1-\alpha)^2$ and $1$ if `adjust=True`, and $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. - - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted @@ -503,37 +502,6 @@ def ewm_mean( │ 1.666667 │ │ 2.428571 │ └──────────┘ - - pandas and Polars handle nulls differently. So, calculating ewm over - a sequence with null values leads to distinct results: - - >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} - >>> df_pd2 = pd.DataFrame(data) - >>> df_pl2 = pl.DataFrame(data) - - >>> func(df_pd2) - a - 0 2.000000 - 1 3.333333 - 2 NaN - 3 3.090909 - 4 NaN - 5 3.023256 - - >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE - shape: (6, 1) - ┌──────────┐ - │ a │ - │ --- │ - │ f64 │ - ╞══════════╡ - │ 2.0 │ - │ 3.333333 │ - │ null │ - │ 3.090909 │ - │ NaN │ - │ NaN │ - └──────────┘ """ return self.__class__( lambda plx: self._call(plx).ewm_mean( diff --git a/narwhals/series.py b/narwhals/series.py index 7e60b1581..01829cb04 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -418,7 +418,6 @@ def ewm_mean( calculating the final weighted average of $[x_0, None, x_2]$ are $(1-\alpha)^2$ and $1$ if `adjust=True`, and $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. - - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted @@ -459,35 +458,6 @@ def ewm_mean( 1.666667 2.428571 ] - - pandas and Polars handle NaN differently. So, calculating ewm over - a sequence with null values leads to distinct results. Narwhals - matches Polars' results when "None" is in the input. - - >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] - >>> s_pd2 = pd.Series(name="a", data=data) - >>> s_pl2 = pl.Series(name="a", values=data) - - >>> func(s_pd2) - 0 2.000000 - 1 3.333333 - 2 NaN - 3 3.090909 - 4 NaN - 5 3.023256 - Name: a, dtype: float64 - - >>> func(s_pl2) # doctest: +NORMALIZE_WHITESPACE - shape: (6,) - Series: 'a' [f64] - [ - 2.0 - 3.333333 - null - 3.090909 - NaN - NaN - ] """ return self._from_compliant_series( self._compliant_series.ewm_mean( diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 174115813..12016d80d 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -535,7 +535,6 @@ def ewm_mean( calculating the final weighted average of $[x_0, None, x_2]$ are $(1-\alpha)^2$ and $1$ if `adjust=True`, and $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. - - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted @@ -576,35 +575,6 @@ def ewm_mean( 1.666667 2.428571 ] - - pandas and Polars handle NaN differently. So, calculating ewm over - a sequence with null values leads to distinct results. Narwhals - matches Polars' results when "None" is in the input. - - >>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0] - >>> s_pd2 = pd.Series(name="a", data=data) - >>> s_pl2 = pl.Series(name="a", values=data) - - >>> func(s_pd2) - 0 2.000000 - 1 3.333333 - 2 NaN - 3 3.090909 - 4 NaN - 5 3.023256 - Name: a, dtype: float64 - - >>> func(s_pl2) # doctest: +NORMALIZE_WHITESPACE - shape: (6,) - Series: 'a' [f64] - [ - 2.0 - 3.333333 - null - 3.090909 - NaN - NaN - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -762,7 +732,6 @@ def ewm_mean( calculating the final weighted average of $[x_0, None, x_2]$ are $(1-\alpha)^2$ and $1$ if `adjust=True`, and $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. - - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted @@ -806,37 +775,6 @@ def ewm_mean( │ 1.666667 │ │ 2.428571 │ └──────────┘ - - pandas and Polars handle nulls differently. So, calculating ewm over - a sequence with null values leads to distinct results: - - >>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]} - >>> df_pd2 = pd.DataFrame(data) - >>> df_pl2 = pl.DataFrame(data) - - >>> func(df_pd2) - a - 0 2.000000 - 1 3.333333 - 2 NaN - 3 3.090909 - 4 NaN - 5 3.023256 - - >>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE - shape: (6, 1) - ┌──────────┐ - │ a │ - │ --- │ - │ f64 │ - ╞══════════╡ - │ 2.0 │ - │ 3.333333 │ - │ null │ - │ 3.090909 │ - │ NaN │ - │ NaN │ - └──────────┘ """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel From bbe2cae0dbc100fea15477e2554480925b5506ab Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Nov 2024 10:03:40 +0000 Subject: [PATCH 25/25] use None in test --- tests/expr_and_series/ewm_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/ewm_test.py b/tests/expr_and_series/ewm_test.py index 8f52588e9..e541a5bfe 100644 --- a/tests/expr_and_series/ewm_test.py +++ b/tests/expr_and_series/ewm_test.py @@ -113,7 +113,7 @@ def test_ewm_mean_dask_raise() -> None: "a": [ 2.0, 3.3333333333333335, - float("nan"), + None, 3.142857142857143, ] }, @@ -124,7 +124,7 @@ def test_ewm_mean_dask_raise() -> None: "a": [ 2.0, 3.3333333333333335, - float("nan"), + None, 3.090909090909091, ] }, @@ -160,7 +160,7 @@ def test_ewm_mean_params( request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [2, 5, 3]})) - expected = {"a": [2.0, 4.0, 3.4285714285714284]} + expected: dict[str, list[float | None]] = {"a": [2.0, 4.0, 3.4285714285714284]} assert_equal_data( df.select(nw.col("a").ewm_mean(alpha=0.5, adjust=True, ignore_nulls=True)), expected, @@ -177,7 +177,7 @@ def test_ewm_mean_params( df.select(nw.col("a").ewm_mean(half_life=1.5, adjust=False)), expected ) - expected = {"a": [float("nan"), 4.0, 3.4285714285714284]} + expected = {"a": [None, 4.0, 3.4285714285714284]} assert_equal_data( df.select( nw.col("a").ewm_mean(alpha=0.5, adjust=True, min_periods=2, ignore_nulls=True)