diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index dd8f39ec5..9e90419c8 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -501,17 +501,31 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob pa_table = self.to_arrow() return pa_table.__arrow_c_stream__(requested_schema=requested_schema) - def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any]: + def lazy( + self: Self, + *, + backend: ModuleType | Implementation | str | None = None, + ) -> LazyFrame[Any]: """Restrict available API methods to lazy-only ones. If `backend` is specified, then a conversion between different backends might be triggered. + If a library does not support lazy execution and `backend` is not specified, then this is will only restrict the API to lazy-only operations. This is useful if you want to ensure that you write dataframe-agnostic code which all has the possibility of running entirely lazily. Arguments: + backend: specifies which lazy backend collect to. This will be the underlying + backend for the resulting Narwhals LazyFrame. + + `backend` can be specified in various ways: + + - As `Implementation.` with `BACKEND` being `DASK`, `DUCKDB` + or `POLARS`. + - As a string: `"dask"`, `"duckdb"` or `"polars"` + - Directly as a module `dask.dataframe`, `duckdb` or `polars`. backend: The (lazy) implementation to convert to. If not specified, and the given library does not support lazy execution, then this will restrict the API to lazy-only operations. @@ -552,19 +566,20 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any] |└───────┴───────┘ | └──────────────────┘ """ + lazy_backend = None if backend is None else Implementation.from_backend(backend) supported_lazy_backends = ( Implementation.DASK, Implementation.DUCKDB, Implementation.POLARS, ) - if backend is not None and backend not in supported_lazy_backends: + if lazy_backend is not None and lazy_backend not in supported_lazy_backends: msg = ( "Not-supported backend." - f"\n\nExpected one of {supported_lazy_backends} or `None`, got {backend}" + f"\n\nExpected one of {supported_lazy_backends} or `None`, got {lazy_backend}" ) raise ValueError(msg) return self._lazyframe( - self._compliant_frame.lazy(backend=backend), + self._compliant_frame.lazy(backend=lazy_backend), level="lazy", ) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 43ab578cd..d4281a070 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -167,17 +167,31 @@ def __getitem__(self: Self, item: tuple[slice, slice]) -> Self: ... def __getitem__(self: Self, item: Any) -> Any: return super().__getitem__(item) - def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any]: + def lazy( + self: Self, + *, + backend: ModuleType | Implementation | str | None = None, + ) -> LazyFrame[Any]: """Restrict available API methods to lazy-only ones. If `backend` is specified, then a conversion between different backends might be triggered. + If a library does not support lazy execution and `backend` is not specified, then this is will only restrict the API to lazy-only operations. This is useful if you want to ensure that you write dataframe-agnostic code which all has the possibility of running entirely lazily. Arguments: + backend: specifies which lazy backend collect to. This will be the underlying + backend for the resulting Narwhals LazyFrame. + + `backend` can be specified in various ways: + + - As `Implementation.` with `BACKEND` being `DASK`, `DUCKDB` + or `POLARS`. + - As a string: `"dask"`, `"duckdb"` or `"polars"` + - Directly as a module `dask.dataframe`, `duckdb` or `polars`. backend: The (lazy) implementation to convert to. If not specified, and the given library does not support lazy execution, then this will restrict the API to lazy-only operations. diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py index fa9d871df..64eabd95f 100644 --- a/tests/frame/lazy_test.py +++ b/tests/frame/lazy_test.py @@ -2,10 +2,15 @@ from typing import TYPE_CHECKING +import pandas as pd +import polars as pl +import pyarrow as pa import pytest import narwhals as nw import narwhals.stable.v1 as nw_v1 +from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_modin from narwhals.utils import Implementation if TYPE_CHECKING: @@ -15,7 +20,7 @@ data = {"a": [1, 2, 3]} -def test_lazy(constructor_eager: ConstructorEager) -> None: +def test_lazy_to_default(constructor_eager: ConstructorEager) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) result = df.lazy() assert isinstance(result, nw.LazyFrame) @@ -23,25 +28,52 @@ def test_lazy(constructor_eager: ConstructorEager) -> None: result = df.lazy() assert isinstance(result, nw_v1.LazyFrame) + if "polars" in str(constructor_eager): + expected_cls = pl.LazyFrame + elif "pandas" in str(constructor_eager): + expected_cls = pd.DataFrame + elif "modin" in str(constructor_eager): + mpd = get_modin() + expected_cls = mpd.DataFrame + elif "cudf" in str(constructor_eager): + cudf = get_cudf() + expected_cls = cudf.DataFrame + else: # pyarrow + expected_cls = pa.Table + + assert isinstance(result.to_native(), expected_cls) + @pytest.mark.parametrize( - "backend", [Implementation.POLARS, Implementation.DUCKDB, Implementation.DASK] + "backend", + [ + Implementation.POLARS, + Implementation.DUCKDB, + Implementation.DASK, + "polars", + "duckdb", + "dask", + ], ) def test_lazy_backend( request: pytest.FixtureRequest, constructor_eager: ConstructorEager, - backend: Implementation, + backend: Implementation | str, ) -> None: if "modin" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if backend is Implementation.DASK: + if (backend is Implementation.DASK) or backend == "dask": pytest.importorskip("dask") - if backend is Implementation.DUCKDB: + if (backend is Implementation.DUCKDB) or backend == "duckdb": pytest.importorskip("duckdb") df = nw.from_native(constructor_eager(data), eager_only=True) result = df.lazy(backend=backend) assert isinstance(result, nw.LazyFrame) - assert result.implementation == backend + + expected = ( + Implementation.from_string(backend) if isinstance(backend, str) else backend + ) + assert result.implementation == expected def test_lazy_backend_invalid(constructor_eager: ConstructorEager) -> None: