From ed3ff6772f7e4c6c8bcb5dfa553bad91164f560c Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 17 Jun 2024 20:28:40 +1000
Subject: [PATCH 01/21] faster pivot_longer for non dot value

---
 janitor/polars/pivot_longer.py                | 817 ++++++++++++------
 .../functions/test_pivot_longer_polars.py     | 141 ++-
 2 files changed, 613 insertions(+), 345 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 6e7024cc7..108670419 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -2,14 +2,10 @@
 
 from __future__ import annotations
 
-from collections import defaultdict
-from typing import Any, Iterable
-
 from janitor.utils import check, import_message
 
 try:
     import polars as pl
-    import polars.selectors as cs
     from polars.type_aliases import ColumnNameOrSelector
 except ImportError:
     import_message(
@@ -138,24 +134,23 @@ def pivot_longer_spec(
             "are not present in the source DataFrame."
         )
 
-    if spec.columns[:2] != [".name", ".value"]:
-        raise ValueError(
-            "The first two columns of the spec DataFrame "
-            "should be '.name' and '.value', "
-            "with '.name' coming before '.value'."
-        )
+    df_columns = pl.DataFrame({".name": df.columns})
 
-    return _pivot_longer_dot_value(
-        df=df,
-        spec=spec,
-    )
+    spec = df_columns.join(spec, on=".name", how="left")
+    spec = spec.select(pl.exclude(".name"))
+    if len(spec.columns) == 1:
+        return _pivot_longer_dot_value_only(
+            df=df,
+            outcome=spec,
+        )
+    return
 
 
 def _pivot_longer(
     df: pl.DataFrame | pl.LazyFrame,
     index: ColumnNameOrSelector,
     column_names: ColumnNameOrSelector,
-    names_to: list | tuple | str,
+    names_to: list | tuple | str | None,
     values_to: str,
     names_sep: str,
     names_pattern: str,
@@ -165,29 +160,6 @@ def _pivot_longer(
     Unpivots a DataFrame/LazyFrame from wide to long form.
     """
 
-    (
-        df,
-        index,
-        column_names,
-        names_to,
-        values_to,
-        names_sep,
-        names_pattern,
-        names_transform,
-    ) = _data_checks_pivot_longer(
-        df=df,
-        index=index,
-        column_names=column_names,
-        names_to=names_to,
-        values_to=values_to,
-        names_sep=names_sep,
-        names_pattern=names_pattern,
-        names_transform=names_transform,
-    )
-
-    if not column_names:
-        return df
-
     if all((names_pattern is None, names_sep is None)):
         return df.melt(
             id_vars=index,
@@ -196,50 +168,180 @@ def _pivot_longer(
             value_name=values_to,
         )
 
-    df = df.select(pl.col(index), pl.col(column_names))
     if isinstance(names_to, str):
         names_to = [names_to]
+    elif isinstance(names_to, (list, tuple)):
+        uniques = set()
+        for word in names_to:
+            if not isinstance(word, str):
+                raise TypeError(
+                    f"'{word}' in names_to should be a string type; "
+                    f"instead got type {type(word).__name__}"
+                )
+            if (word in uniques) and (word != ".value"):
+                raise ValueError(f"'{word}' is duplicated in names_to.")
+            uniques.add(word)
+    else:
+        raise TypeError(
+            "names_to should be a string, list, or tuple; "
+            f"instead got type {type(names_to).__name__}"
+        )
+
+    if names_sep and names_pattern:
+        raise ValueError(
+            "Only one of names_pattern or names_sep should be provided."
+        )
+
+    if names_sep is not None:
+        check("names_sep", names_sep, [str])
 
-    spec = _pivot_longer_create_spec(
+    else:
+        check("names_pattern", names_pattern, [str])
+
+    check("values_to", values_to, [str])
+
+    if names_sep and (".value" not in names_to):
+        return _pivot_longer_names_sep_no_dot_value(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            values_to=values_to,
+            names_sep=names_sep,
+            names_transform=names_transform,
+        )
+    if names_pattern and (".value" not in names_to):
+        return _pivot_longer_names_pattern_no_dot_value(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            values_to=values_to,
+            names_pattern=names_pattern,
+            names_transform=names_transform,
+        )
+    if names_sep:
+        return _pivot_longer_names_sep_dot_value(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            names_sep=names_sep,
+            names_transform=names_transform,
+        )
+    return _pivot_longer_names_pattern_dot_value(
+        df=df,
+        index=index,
         column_names=column_names,
         names_to=names_to,
-        names_sep=names_sep,
         names_pattern=names_pattern,
-        values_to=values_to,
         names_transform=names_transform,
     )
 
-    return _pivot_longer_dot_value(df=df, spec=spec)
-
 
-def _pivot_longer_create_spec(
-    column_names: Iterable,
-    names_to: Iterable,
-    names_sep: str | None,
-    names_pattern: str | None,
+def _pivot_longer_names_sep_no_dot_value(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
     values_to: str,
+    names_sep: str,
     names_transform: pl.Expr,
-) -> pl.DataFrame:
+) -> pl.DataFrame | pl.LazyFrame:
     """
-    This is where the spec DataFrame is created,
-    before the transformation to long form.
+    flip polars Frame to long form,
+    if names_sep and no .value in names_to.
     """
-    spec = pl.DataFrame({".name": column_names})
-    if names_sep is not None:
-        expression = (
-            pl.col(".name")
+    variable_name = "".join(df.columns)
+    # the implode approach is used here
+    # for efficiency
+    # it is much faster to extract the relevant strings
+    # on a smaller set and then explode
+    # than to melt into the full data and then extract
+    outcome = (
+        df.select(pl.all().implode())
+        .melt(
+            id_vars=index,
+            value_vars=column_names,
+            variable_name=variable_name,
+            value_name=values_to,
+        )
+        .with_columns(
+            pl.col(variable_name)
             .str.split(by=names_sep)
-            .list.to_struct(n_field_strategy="max_width")
-            .alias("extract")
+            .list.to_struct(n_field_strategy="max_width"),
         )
+    )
+    if isinstance(df, pl.LazyFrame):
+        extract = outcome.select(variable_name).collect().to_series(0)
     else:
-        expression = (
-            pl.col(".name")
-            .str.extract_groups(pattern=names_pattern)
-            .alias("extract")
+        extract = outcome.get_column(variable_name)
+
+    len_names_to = len(names_to)
+
+    len_fields = len(extract.struct.fields)
+
+    if len_names_to != len_fields:
+        raise ValueError(
+            f"The length of names_to does not match "
+            "the number of fields extracted. "
+            f"The length of names_to is {len_names_to} "
+            "while the number of fields extracted is "
+            f"{len_fields}."
         )
-    spec = spec.with_columns(expression)
-    len_fields = len(spec.get_column("extract").struct.fields)
+
+    expression = pl.col(variable_name).struct.rename_fields(names=names_to)
+    outcome = outcome.with_columns(expression)
+
+    if isinstance(df, pl.LazyFrame):
+        # to ensure the unnested columns are available downstream
+        # in a LazyFrame, a workaround is to reintroduce
+        # the variable_name column via with_columns
+        series = outcome.select(variable_name).collect()
+        outcome = outcome.with_columns(series)
+
+    outcome = outcome.unnest(variable_name)
+    if names_transform is not None:
+        outcome = outcome.with_columns(names_transform)
+
+    columns = [name for name in outcome.columns if name not in names_to]
+    outcome = outcome.explode(columns=columns)
+    return outcome
+
+
+def _pivot_longer_names_pattern_no_dot_value(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
+    values_to: str,
+    names_pattern: str,
+    names_transform: pl.Expr,
+) -> pl.DataFrame | pl.LazyFrame:
+    """
+    flip polars Frame to long form,
+    if names_pattern and no .value in names_to.
+    """
+    variable_name = "".join(df.columns)
+    outcome = df.select(pl.all().implode())
+    outcome = outcome.melt(
+        id_vars=index,
+        value_vars=column_names,
+        variable_name=variable_name,
+        value_name=values_to,
+    )
+    alias = outcome.columns
+    alias = "".join(alias)
+    alias = f"{alias}_"
+    expression = pl.col(variable_name)
+    expression = expression.str.extract_groups(pattern=names_pattern)
+    expression = expression.alias(alias)
+    outcome = outcome.with_columns(expression)
+    extract = outcome.select(alias, variable_name)
+    is_a_lazyframe = isinstance(df, pl.LazyFrame)
+    if is_a_lazyframe:
+        extract = extract.collect()
+    len_fields = len(extract.get_column(alias).struct.fields)
     len_names_to = len(names_to)
 
     if len_names_to != len_fields:
@@ -250,220 +352,425 @@ def _pivot_longer_create_spec(
             "while the number of fields extracted is "
             f"{len_fields}."
         )
-    if names_pattern is not None:
-        expression = pl.exclude(".name").is_null().any()
-        expression = pl.any_horizontal(expression)
-        null_check = (
-            spec.unnest(columns="extract")
-            .filter(expression)
-            .get_column(".name")
+    expression = pl.exclude(variable_name).is_null().any()
+    expression = pl.any_horizontal(expression)
+    null_check = (
+        extract.unnest(alias).filter(expression).get_column(variable_name)
+    )
+    if null_check.len():
+        column_name = null_check.gather(0).item()
+        raise ValueError(
+            f"Column label '{column_name}' "
+            "could not be matched with any of the groups "
+            "in the provided regex. Kindly provide a regular expression "
+            "(with the correct groups) that matches all labels in the columns."
         )
-        if null_check.len():
-            column_name = null_check.gather(0).item()
-            raise ValueError(
-                f"Column label '{column_name}' "
-                "could not be matched with any of the groups "
-                "in the provided regex. Kindly provide a regular expression "
-                "(with the correct groups) that matches all labels in the columns."
-            )
-    if names_to.count(".value") < 2:
-        expression = pl.col("extract").struct.rename_fields(names=names_to)
-        spec = spec.with_columns(expression).unnest(columns="extract")
-    else:
-        spec = _squash_multiple_dot_value(spec=spec, names_to=names_to)
-    if ".value" not in names_to:
-        expression = pl.lit(value=values_to).alias(".value")
-        spec = spec.with_columns(expression)
 
-    spec = spec.select(
-        pl.col([".name", ".value"]), pl.exclude([".name", ".value"])
-    )
+    expression = pl.col(alias).struct.rename_fields(names=names_to)
+    outcome = outcome.with_columns(expression)
+
+    outcome = outcome.select(pl.exclude(variable_name))
+    if is_a_lazyframe:
+        series = outcome.select(alias).collect()
+        outcome = outcome.with_columns(series)
+    outcome = outcome.unnest(alias)
     if names_transform is not None:
-        spec = spec.with_columns(names_transform)
-    return spec
+        outcome = outcome.with_columns(names_transform)
+
+    columns = [name for name in outcome.columns if name not in names_to]
+    outcome = outcome.explode(columns=columns)
+    return outcome
 
 
-def _pivot_longer_dot_value(
-    df: pl.DataFrame | pl.LazyFrame, spec: pl.DataFrame
+def _pivot_longer_names_sep_dot_value(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
+    names_sep: str,
+    names_transform: pl.Expr,
 ) -> pl.DataFrame | pl.LazyFrame:
     """
-    Reshape DataFrame to long form based on metadata in `spec`.
+    flip polars Frame to long form,
+    if names_sep and .value in names_to.
     """
-    index = [column for column in df.columns if column not in spec[".name"]]
-    not_dot_value = [
-        column for column in spec.columns if column not in {".name", ".value"}
-    ]
-    idx = "".join(spec.columns)
-    if not_dot_value:
-        # assign a number to each group (grouped by not_dot_value)
-        expression = pl.first(idx).over(not_dot_value).rank("dense").sub(1)
-        spec = spec.with_row_index(name=idx).with_columns(expression)
-    else:
-        # use a cumulative count to properly pair the columns
-        # grouped by .value
-        expression = pl.cum_count(".value").over(".value").alias(idx)
-        spec = spec.with_columns(expression)
-    mapping = defaultdict(list)
-    for position, column_name, replacement_name in zip(
-        spec.get_column(name=idx),
-        spec.get_column(name=".name"),
-        spec.get_column(name=".value"),
-    ):
-        expression = pl.col(column_name).alias(replacement_name)
-        mapping[position].append(expression)
-
-    mapping = (
-        (
-            [
-                *index,
-                *columns_to_select,
-            ],
-            pl.lit(position, dtype=pl.UInt32).alias(idx),
+
+    variable_name = "".join(df.columns)
+    value_name = f"{''.join(df.columns)}_"
+    outcome = _names_sep_reshape(
+        df=df,
+        index=index,
+        variable_name=variable_name,
+        column_names=column_names,
+        names_to=names_to,
+        value_name=value_name,
+        names_sep=names_sep,
+        names_transform=names_transform,
+    )
+
+    others = [name for name in names_to if name != ".value"]
+    if others:
+        return _pivot_longer_dot_value_others(
+            df=df,
+            outcome=outcome,
+            value_name=value_name,
+            others=others,
         )
-        for position, columns_to_select in mapping.items()
+    return _pivot_longer_dot_value_only(
+        df=df,
+        outcome=outcome,
+        variable_name=variable_name,
+        value_name=value_name,
     )
-    df = [
-        df.select(columns_to_select).with_columns(position)
-        for columns_to_select, position in mapping
-    ]
-    # rechunking can be expensive;
-    # however subsequent operations are faster
-    # since data is contiguous in memory
-    df = pl.concat(df, how="diagonal_relaxed", rechunk=True)
-    expression = pl.cum_count(".value").over(".value").eq(1)
-    dot_value = spec.filter(expression).select(".value")
-    columns_to_select = [*index, *dot_value.to_series(0)]
-    if not_dot_value:
-        if isinstance(df, pl.LazyFrame):
-            ranges = df.select(idx).collect().get_column(idx)
-        else:
-            ranges = df.get_column(idx)
-        spec = spec.select(pl.struct(not_dot_value))
-        _value = spec.columns[0]
-        expression = pl.cum_count(_value).over(_value).eq(1)
-        # using a gather approach, instead of a join
-        # offers more performance - not sure why
-        # maybe in the join there is another rechunking?
-        spec = spec.filter(expression).select(pl.col(_value).gather(ranges))
-        df = df.with_columns(spec).unnest(_value)
-        columns_to_select.extend(not_dot_value)
-    return df.select(columns_to_select)
-
-
-def _squash_multiple_dot_value(
-    spec: pl.DataFrame, names_to: Iterable
-) -> pl.DataFrame:
+
+
+def _pivot_longer_names_pattern_dot_value(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
+    names_pattern: str,
+    names_transform: pl.Expr,
+) -> pl.DataFrame | pl.LazyFrame:
     """
-    Combine multiple .values into a single .value column
+    flip polars Frame to long form,
+    if names_pattern and .value in names_to.
     """
-    extract = spec.get_column("extract")
-    fields = extract.struct.fields
-    dot_value = [
-        field for field, label in zip(fields, names_to) if label == ".value"
-    ]
-    dot_value = pl.concat_str(dot_value).alias(".value")
-    not_dot_value = [
-        pl.col(field).alias(label)
-        for field, label in zip(fields, names_to)
-        if label != ".value"
-    ]
-    select_expr = [".name", dot_value]
-    if not_dot_value:
-        select_expr.extend(not_dot_value)
-
-    return spec.unnest("extract").select(select_expr)
-
-
-def _data_checks_pivot_longer(
-    df,
-    index,
-    column_names,
-    names_to,
-    values_to,
-    names_sep,
-    names_pattern,
-    names_transform,
-) -> tuple:
+
+    variable_name = "".join(df.columns)
+    value_name = f"{''.join(df.columns)}_"
+    outcome = _names_pattern_reshape(
+        df=df,
+        index=index,
+        variable_name=variable_name,
+        column_names=column_names,
+        names_to=names_to,
+        value_name=value_name,
+        names_pattern=names_pattern,
+        names_transform=names_transform,
+    )
+
+    others = [name for name in names_to if name != ".value"]
+    if others:
+        return _pivot_longer_dot_value_others(
+            df=df,
+            outcome=outcome,
+            value_name=value_name,
+            others=others,
+        )
+    return _pivot_longer_dot_value_only(
+        df=df,
+        outcome=outcome,
+        value_name=value_name,
+    )
+
+
+def _pivot_longer_dot_value_only(
+    df: pl.DataFrame | pl.LazyFrame,
+    outcome: pl.DataFrame | pl.LazyFrame,
+    value_name: str,
+) -> pl.DataFrame | pl.LazyFrame:
     """
-    This function majorly does type checks on the passed arguments.
+    Pivot to long form if '.value' only
+    """
+    # for .value reshaping, each sub Frame
+    # should have the same columns
+    # the code below creates a DataFrame of unique values
+    # (here we use cumcount to ensure uniqueness)
+    alias = "".join(outcome.columns)
+    expression = pl.cum_count(".value").over(".value").alias(alias)
+    outcome = outcome.with_columns(expression)
+    expr1 = pl.col(".value").unique().sort().implode()
+    expr2 = pl.col(alias).unique().sort().implode()
+    uniqs = outcome.select(expr1, expr2)
+    uniqs = uniqs.explode(".value")
+    uniqs = uniqs.explode(alias)
+    # uniqs is then joined to `outcome`
+    # to ensure all groups have the labels in .value
+    # this may introduce nulls if not all groups
+    # shared the same labels in .value prior to the join -
+    # the null check below handles that
+    outcome = uniqs.join(outcome, on=uniqs.columns, how="left")
+    # patch to deal with nulls
+    expression = pl.col(value_name).is_null().any()
+    null_check = outcome.select(expression)
+    is_a_lazyframe = isinstance(df, pl.LazyFrame)
+    if is_a_lazyframe:
+        null_check = null_check.collect()
+    null_check = null_check.item()
+    if null_check:
+        variable_name = "".join(outcome.columns)
+        expr1 = pl.lit(None).alias(variable_name)
+        expr2 = pl.implode(variable_name)
+        nulls = df.with_columns(expr1).select(expr2)
+        if is_a_lazyframe:
+            nulls = nulls.collect()
+        nulls = nulls.to_series(0)
+        expression = pl.col(value_name).fill_null(nulls)
+        outcome = outcome.with_columns(expression)
+
+    index = [
+        label
+        for label in outcome.columns
+        if label not in {alias, value_name, ".value"}
+    ]
+    # due to the implodes, index, if present is repeated
+    # however, we need index to be unique,
+    # hence the selection of only the first entry
+    # from the duplicated(repeated) index values in the list
+    agg_ = [pl.first(index), pl.col(".value"), pl.col(value_name)]
+    outcome = outcome.group_by(alias, maintain_order=True).agg(agg_)
+    # since all groups have the same labels in '.value'
+    # and order is assured in the group_by operation
+    # we just grab only the first row
+    # which will serve as headers of the new columns with values
+    fields = outcome.select(pl.first(".value"))
+    if is_a_lazyframe:
+        fields = fields.collect()
+    fields = fields.item().to_list()
+
+    outcome = outcome.select(pl.exclude(".value"))
+    expression = pl.col(value_name).list.to_struct(
+        n_field_strategy="max_width", fields=fields
+    )
+    outcome = outcome.with_columns(expression)
+    if is_a_lazyframe:
+        # to ensure the unnested columns are available downstream
+        # in a LazyFrame, a workaround is to reintroduce
+        # the value_name column via with_columns
+        series = outcome.select(value_name).collect()
+        outcome = outcome.with_columns(series)
+    outcome = (
+        outcome.unnest(value_name)
+        .explode([*index, *fields])
+        .select(pl.exclude(alias))
+    )
+    return outcome
 
-    This function is executed before proceeding to the computation phase.
 
-    Type annotations are not provided because this function is where type
-    checking happens.
+def _pivot_longer_dot_value_others(
+    df: pl.DataFrame | pl.LazyFrame,
+    outcome: pl.DataFrame | pl.LazyFrame,
+    value_name: str,
+    others: list,
+) -> pl.DataFrame | pl.LazyFrame:
     """
+    Pivot to long form if '.value'
+    and `others`.
+    """
+    # logic breakdown is similar to _pivot_longer_dot_value_only
+    expr1 = pl.struct(others).unique().sort().implode()
+    expr2 = pl.col(".value").unique().sort().implode()
+    uniqs = outcome.select(expr1, expr2)
+    uniqs = uniqs.explode(others[0])
+    uniqs = uniqs.explode(".value")
+    uniqs = uniqs.unnest(others[0])
+
+    outcome = uniqs.join(outcome, on=uniqs.columns, how="left")
+
+    expression = pl.col(value_name).is_null().any()
+    null_check = outcome.select(expression)
+    is_a_lazyframe = isinstance(df, pl.LazyFrame)
+    if is_a_lazyframe:
+        null_check = null_check.collect()
+    null_check = null_check.item()
+    if null_check:
+        variable_name = "".join(outcome.columns)
+        expr1 = pl.lit(None).alias(variable_name)
+        expr2 = pl.implode(variable_name)
+        nulls = df.with_columns(expr1).select(expr2)
+        if is_a_lazyframe:
+            nulls = nulls.collect()
+        nulls = nulls.to_series(0)
+        expression = pl.col(value_name).fill_null(nulls)
+        outcome = outcome.with_columns(expression)
+
+    index = [
+        label
+        for label in outcome.columns
+        if label not in {*others, value_name, ".value"}
+    ]
+    agg_ = [pl.first(index), pl.col(".value"), pl.col(value_name)]
+    outcome = outcome.group_by(others, maintain_order=True).agg(agg_)
 
-    def _check_type(arg_name: str, arg_value: Any):
-        """
-        Raise if argument is not a valid type
-        """
-
-        def _check_type_single(entry):
-            if (
-                not isinstance(entry, str)
-                and not cs.is_selector(entry)
-                and not isinstance(entry, pl.Expr)
-            ):
-                raise TypeError(
-                    f"The argument passed to the {arg_name} parameter "
-                    "should be a type that is supported in the polars' "
-                    "select function."
-                )
+    fields = outcome.select(pl.first(".value"))
+    if is_a_lazyframe:
+        fields = fields.collect()
+    fields = fields.item().to_list()
 
-        if isinstance(arg_value, (list, tuple)):
-            for entry in arg_value:
-                _check_type_single(entry=entry)
-        else:
-            _check_type_single(entry=arg_value)
-
-    if (index is None) and (column_names is None):
-        column_names = df.columns
-        index = []
-    elif (index is not None) and (column_names is not None):
-        _check_type(arg_name="index", arg_value=index)
-        index = df.select(index).columns
-        _check_type(arg_name="column_names", arg_value=column_names)
-        column_names = df.select(column_names).columns
-
-    elif (index is None) and (column_names is not None):
-        _check_type(arg_name="column_names", arg_value=column_names)
-        column_names = df.select(column_names).columns
-        index = df.select(pl.exclude(column_names)).columns
-
-    elif (index is not None) and (column_names is None):
-        _check_type(arg_name="index", arg_value=index)
-        index = df.select(index).columns
-        column_names = df.select(pl.exclude(index)).columns
-
-    check("names_to", names_to, [list, tuple, str])
-    if isinstance(names_to, (list, tuple)):
-        uniques = set()
-        for word in names_to:
-            check(f"'{word}' in names_to", word, [str])
-            if (word in uniques) and (word != ".value"):
-                raise ValueError(f"'{word}' is duplicated in names_to.")
-            uniques.add(word)
+    outcome = outcome.select(pl.exclude(".value"))
+    expression = pl.col(value_name).list.to_struct(
+        n_field_strategy="max_width", fields=fields
+    )
 
-    if names_sep and names_pattern:
+    outcome = outcome.with_columns(expression)
+    if is_a_lazyframe:
+        series = outcome.select(value_name).collect()
+        outcome = outcome.with_columns(series)
+    outcome = outcome.unnest(value_name).explode([*index, *fields])
+
+    return outcome
+
+
+def _names_sep_reshape(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
+    variable_name: str,
+    value_name: str,
+    names_sep: str,
+    names_transform: pl.Expr,
+) -> pl.DataFrame | pl.LazyFrame:
+    # the implode approach is used here
+    # for efficiency
+    # it is much faster to extract the relevant strings
+    # on a smaller set and then explode
+    # than to melt into the full data and then extract
+    outcome = (
+        df.select(pl.all().implode())
+        .melt(
+            id_vars=index,
+            value_vars=column_names,
+            variable_name=variable_name,
+            value_name=value_name,
+        )
+        .with_columns(
+            pl.col(variable_name)
+            .str.split(by=names_sep)
+            .list.to_struct(n_field_strategy="max_width"),
+        )
+    )
+
+    if isinstance(df, pl.LazyFrame):
+        extract = outcome.select(variable_name).collect().to_series(0)
+    else:
+        extract = outcome.get_column(variable_name)
+
+    len_names_to = len(names_to)
+
+    len_fields = len(extract.struct.fields)
+
+    if len_names_to != len_fields:
         raise ValueError(
-            "Only one of names_pattern or names_sep should be provided."
+            f"The length of names_to does not match "
+            "the number of fields extracted. "
+            f"The length of names_to is {len_names_to} "
+            "while the number of fields extracted is "
+            f"{len_fields}."
         )
 
-    if names_sep is not None:
-        check("names_sep", names_sep, [str])
+    if names_to.count(".value") > 1:
+        _fields = extract.struct.fields
+        fields = [
+            extract.struct.field(label)
+            for label, name in zip(_fields, names_to)
+            if name == ".value"
+        ]
+        _value = pl.concat_str(fields).alias(".value")
+        fields = [
+            extract.struct.field(label).alias(name)
+            for label, name in zip(_fields, names_to)
+            if name != ".value"
+        ]
+        fields.append(_value)
+        extract = pl.struct(fields).alias(variable_name)
+        outcome = outcome.with_columns(extract)
+    else:
+        expression = pl.col(variable_name).struct.rename_fields(names=names_to)
+        outcome = outcome.with_columns(expression)
+    if isinstance(df, pl.LazyFrame):
+        # to ensure the unnested columns are available downstream
+        # in a LazyFrame, a workaround is to reintroduce
+        # the variable_name column via with_columns
+        series = outcome.select(variable_name).collect()
+        outcome = outcome.with_columns(series)
+    outcome = outcome.unnest(variable_name)
+    if names_transform is not None:
+        outcome = outcome.with_columns(names_transform)
+    return outcome
 
-    if names_pattern is not None:
-        check("names_pattern", names_pattern, [str])
 
-    check("values_to", values_to, [str])
+def _names_pattern_reshape(
+    df: pl.DataFrame | pl.LazyFrame,
+    index: ColumnNameOrSelector,
+    column_names: ColumnNameOrSelector,
+    names_to: list | tuple,
+    variable_name: str,
+    value_name: str,
+    names_pattern: str,
+    names_transform: pl.Expr,
+) -> pl.DataFrame | pl.LazyFrame:
+    outcome = df.select(pl.all().implode())
+    outcome = outcome.melt(
+        id_vars=index,
+        value_vars=column_names,
+        variable_name=variable_name,
+        value_name=value_name,
+    )
+    alias = outcome.columns
+    alias = "".join(alias)
+    alias = f"{alias}_"
+    outcome = outcome.with_columns(
+        pl.col(variable_name)
+        .str.extract_groups(pattern=names_pattern)
+        .alias(alias)
+    )
+    extract = outcome.select(alias, variable_name)
+    is_a_lazyframe = isinstance(df, pl.LazyFrame)
+    if is_a_lazyframe:
+        extract = extract.collect()
+    len_fields = len(extract.get_column(alias).struct.fields)
+    len_names_to = len(names_to)
 
-    return (
-        df,
-        index,
-        column_names,
-        names_to,
-        values_to,
-        names_sep,
-        names_pattern,
-        names_transform,
+    if len_names_to != len_fields:
+        raise ValueError(
+            f"The length of names_to does not match "
+            "the number of fields extracted. "
+            f"The length of names_to is {len_names_to} "
+            "while the number of fields extracted is "
+            f"{len_fields}."
+        )
+    expression = pl.exclude(variable_name).is_null().any()
+    expression = pl.any_horizontal(expression)
+    null_check = (
+        extract.unnest(alias).filter(expression).get_column(variable_name)
     )
+    if null_check.len():
+        column_name = null_check.gather(0).item()
+        raise ValueError(
+            f"Column label '{column_name}' "
+            "could not be matched with any of the groups "
+            "in the provided regex. Kindly provide a regular expression "
+            "(with the correct groups) that matches all labels in the columns."
+        )
+
+    if names_to.count(".value") > 1:
+        extract = extract.get_column(alias)
+        _fields = extract.struct.fields
+        fields = [
+            extract.struct.field(label)
+            for label, name in zip(_fields, names_to)
+            if name == ".value"
+        ]
+        _value = pl.concat_str(fields).alias(".value")
+        fields = [
+            extract.struct.field(label).alias(name)
+            for label, name in zip(_fields, names_to)
+            if name != ".value"
+        ]
+        fields.append(_value)
+        extract = pl.struct(fields).alias(alias)
+        outcome = outcome.with_columns(extract)
+    else:
+        expression = pl.col(alias).struct.rename_fields(names=names_to)
+        outcome = outcome.with_columns(expression)
+
+    outcome = outcome.select(pl.exclude(variable_name))
+    if is_a_lazyframe:
+        series = outcome.select(alias).collect()
+        outcome = outcome.with_columns(series)
+    outcome = outcome.unnest(alias)
+    if names_transform is not None:
+        outcome = outcome.with_columns(names_transform)
+    return outcome
diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
index 46bc61c12..4de18f0d8 100644
--- a/tests/polars/functions/test_pivot_longer_polars.py
+++ b/tests/polars/functions/test_pivot_longer_polars.py
@@ -19,25 +19,9 @@ def df_checks():
     )
 
 
-def test_type_index(df_checks):
-    """Raise TypeError if wrong type is provided for the index."""
-    msg = "The argument passed to the index parameter "
-    msg += "should be a type that is supported in the.+"
-    with pytest.raises(TypeError, match=msg):
-        df_checks.janitor.pivot_longer(index=2007, names_sep="_")
-
-
-def test_type_column_names(df_checks):
-    """Raise TypeError if wrong type is provided for column_names."""
-    msg = "The argument passed to the column_names parameter "
-    msg += "should be a type that is supported in the.+"
-    with pytest.raises(TypeError, match=msg):
-        df_checks.janitor.pivot_longer(column_names=2007, names_sep="_")
-
-
 def test_type_names_to(df_checks):
     """Raise TypeError if wrong type is provided for names_to."""
-    msg = "names_to should be one of .+"
+    msg = "names_to should be a string, list, or tuple.+"
     with pytest.raises(TypeError, match=msg):
         df_checks.janitor.pivot_longer(names_to=2007, names_sep="_")
 
@@ -96,38 +80,6 @@ def test_values_to_wrong_type(df_checks):
         df_checks.janitor.pivot_longer(values_to={"salvo"}, names_sep="_")
 
 
-def test_pivot_index_only(df_checks):
-    """Test output if only index is passed."""
-    result = df_checks.janitor.pivot_longer(
-        index=["famid", "birth"],
-        names_to="dim",
-        values_to="num",
-    )
-
-    actual = df_checks.melt(
-        id_vars=["famid", "birth"], variable_name="dim", value_name="num"
-    )
-
-    assert_frame_equal(result, actual, check_column_order=False)
-
-
-def test_pivot_column_only(df_checks):
-    """Test output if only column_names is passed."""
-    result = df_checks.janitor.pivot_longer(
-        column_names=["ht1", "ht2"],
-        names_to="dim",
-        values_to="num",
-    )
-
-    actual = df_checks.melt(
-        id_vars=["famid", "birth"],
-        variable_name="dim",
-        value_name="num",
-    )
-
-    assert_frame_equal(result, actual, check_column_order=False)
-
-
 def test_names_to_names_pattern_len(df_checks):
     """ "
     Raise ValueError
@@ -167,12 +119,16 @@ def test_names_pat_str(df_checks):
     Test output when names_pattern is a string,
     and .value is present.
     """
-    result = df_checks.janitor.pivot_longer(
-        column_names=cs.starts_with("ht"),
-        names_to=(".value", "age"),
-        names_pattern="(.+)(.)",
-        names_transform=pl.col("age").cast(pl.Int64),
-    ).sort(by=pl.all())
+    result = (
+        df_checks.janitor.pivot_longer(
+            index=["famid", "birth"],
+            names_to=(".value", "age"),
+            names_pattern="(.+)(.)",
+            names_transform=pl.col("age").cast(pl.Int64),
+        )
+        .select("famid", "birth", "age", "ht")
+        .sort(by=pl.all())
+    )
 
     actual = [
         {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
@@ -196,20 +152,7 @@ def test_names_pat_str(df_checks):
     ]
     actual = pl.DataFrame(actual).sort(by=pl.all())
 
-    assert_frame_equal(
-        result, actual, check_dtype=False, check_column_order=False
-    )
-
-
-def test_no_column_names(df_checks):
-    """
-    Test output if all the columns
-    are assigned to the index parameter.
-    """
-    assert_frame_equal(
-        df_checks.janitor.pivot_longer(index=pl.all()),
-        df_checks,
-    )
+    assert_frame_equal(result, actual)
 
 
 @pytest.fixture
@@ -316,23 +259,37 @@ def test_df():
 def test_names_pattern_dot_value(test_df):
     """Test output for names_pattern and .value."""
 
-    result = test_df.janitor.pivot_longer(
-        column_names=pl.all(),
-        names_to=["set", ".value"],
-        names_pattern="(.+)_(.+)",
-    ).sort(by=["loc", "lat", "long"])
-    assert_frame_equal(result, actual, check_column_order=False)
+    result = (
+        test_df.janitor.pivot_longer(
+            column_names=cs.all(),
+            names_to=["set", ".value"],
+            names_pattern="(.+)_(.+)",
+        )
+        .sort(by=["loc", "lat", "long"])
+        .with_columns(
+            pl.col("lat").cast(pl.Float64), pl.col("long").cast(pl.Float64)
+        )
+        .select("set", "loc", "lat", "long")
+    )
+    assert_frame_equal(result, actual)
 
 
 def test_names_sep_dot_value(test_df):
     """Test output for names_pattern and .value."""
 
-    result = test_df.janitor.pivot_longer(
-        column_names=pl.all(),
-        names_to=["set", ".value"],
-        names_sep="_",
-    ).sort(by=["loc", "lat", "long"])
-    assert_frame_equal(result, actual, check_column_order=False)
+    result = (
+        test_df.janitor.pivot_longer(
+            column_names=cs.all(),
+            names_to=["set", ".value"],
+            names_sep="_",
+        )
+        .sort(by=["loc", "lat", "long"])
+        .with_columns(
+            pl.col("lat").cast(pl.Float64), pl.col("long").cast(pl.Float64)
+        )
+        .select("set", "loc", "lat", "long")
+    )
+    assert_frame_equal(result, actual)
 
 
 @pytest.fixture
@@ -394,7 +351,7 @@ def test_not_dot_value_sep2(not_dot_value):
         "country", variable_name="event", value_name="score"
     )
 
-    assert_frame_equal(result, actual, check_column_order=False)
+    assert_frame_equal(result, actual)
 
 
 def test_not_dot_value_pattern(not_dot_value):
@@ -451,6 +408,9 @@ def test_multiple_dot_value():
             names_pattern=r"(x|y)_([0-9])(_mean|_sd)",
             names_transform=pl.col("time").cast(pl.Int64),
         )
+        .with_columns(
+            pl.col("x_mean").cast(pl.Int64), pl.col("y_mean").cast(pl.Int64)
+        )
         .select("unit", "time", "x_mean", "x_sd", "y_mean", "y_sd")
         .sort(by=pl.all())
     )
@@ -466,7 +426,7 @@ def test_multiple_dot_value():
 
     actual = pl.DataFrame(actual).sort(by=pl.all())
 
-    assert_frame_equal(result, actual, check_column_order=False)
+    assert_frame_equal(result, actual)
 
 
 @pytest.fixture
@@ -512,7 +472,7 @@ def test_names_pattern_single_column(single_val):
         "id", names_to=".value", names_pattern="(.)."
     )
 
-    assert_frame_equal(result, actual3, check_column_order=False)
+    assert_frame_equal(result, actual3)
 
 
 def test_names_pattern_single_column_not_dot_value(single_val):
@@ -521,12 +481,11 @@ def test_names_pattern_single_column_not_dot_value(single_val):
     """
     result = single_val.janitor.pivot_longer(
         index="id", column_names="x1", names_to="yA", names_pattern="(.+)"
-    )
+    ).select("id", "yA", "value")
 
     assert_frame_equal(
         result,
         single_val.melt(id_vars="id", value_vars="x1", variable_name="yA"),
-        check_column_order=False,
     )
 
 
@@ -534,14 +493,15 @@ def test_names_pattern_single_column_not_dot_value1(single_val):
     """
     Test output if names_to is not '.value'.
     """
-    result = single_val.select("x1").janitor.pivot_longer(
-        names_to="yA", names_pattern="(.+)"
+    result = (
+        single_val.select("x1")
+        .janitor.pivot_longer(names_to="yA", names_pattern="(.+)")
+        .select("yA", "value")
     )
 
     assert_frame_equal(
         result,
         single_val.select("x1").melt(variable_name="yA"),
-        check_column_order=False,
     )
 
 
@@ -579,6 +539,7 @@ def test_names_pattern_nulls_in_data(df_null):
             names_to=[".value", "child"],
             names_pattern=r"(.+)_(.+)",
         )
+        .with_columns(pl.col("gender").cast(pl.Float64))
         .select("family", "child", "dob", "gender")
         .sort(by=pl.all())
     )
@@ -598,4 +559,4 @@ def test_names_pattern_nulls_in_data(df_null):
 
     actual = pl.DataFrame(actual).sort(by=pl.all())
 
-    assert_frame_equal(result, actual, check_column_order=False)
+    assert_frame_equal(result, actual)

From 1568143c1ef73fbab043ee8d6e9440d2d803c34e Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 18 Jun 2024 12:53:55 +1000
Subject: [PATCH 02/21] fix docs and tests

---
 janitor/polars/dataframe.py                   |   4 +-
 janitor/polars/pivot_longer.py                | 818 ++++++------------
 .../functions/test_pivot_longer_polars.py     |  14 +-
 3 files changed, 272 insertions(+), 564 deletions(-)

diff --git a/janitor/polars/dataframe.py b/janitor/polars/dataframe.py
index 31a55e468..e3d437d8b 100644
--- a/janitor/polars/dataframe.py
+++ b/janitor/polars/dataframe.py
@@ -206,10 +206,10 @@ def pivot_longer(
             │ ---       ┆ ---   ┆ ---    ┆ ---   │
             │ str       ┆ str   ┆ f64    ┆ f64   │
             ╞═══════════╪═══════╪════════╪═══════╡
-            │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
-            │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
             │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
             │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
+            │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
+            │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
             └───────────┴───────┴────────┴───────┘
 
             Split the column labels based on regex:
diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 108670419..aaf66ff8f 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -38,7 +38,7 @@ def pivot_longer_spec(
 
     Examples:
         >>> import pandas as pd
-        >>> import janitor.polars
+        >>> from janitor.polars import pivot_longer_spec
         >>> df = pl.DataFrame(
         ...     {
         ...         "Sepal.Length": [5.1, 5.9],
@@ -77,16 +77,16 @@ def pivot_longer_spec(
         └──────────────┴────────┴───────┘
         >>> df.pipe(pivot_longer_spec,spec=spec)
         shape: (4, 4)
-        ┌───────────┬────────┬───────┬───────┐
-        │ Species   ┆ Length ┆ Width ┆ part  │
-        │ ---       ┆ ---    ┆ ---   ┆ ---   │
-        │ str       ┆ f64    ┆ f64   ┆ str   │
-        ╞═══════════╪════════╪═══════╪═══════╡
-        │ setosa    ┆ 5.1    ┆ 3.5   ┆ Sepal │
-        │ virginica ┆ 5.9    ┆ 3.0   ┆ Sepal │
-        │ setosa    ┆ 1.4    ┆ 0.2   ┆ Petal │
-        │ virginica ┆ 5.1    ┆ 1.8   ┆ Petal │
-        └───────────┴────────┴───────┴───────┘
+        ┌───────────┬───────┬────────┬───────┐
+        │ Species   ┆ part  ┆ Length ┆ Width │
+        │ ---       ┆ ---   ┆ ---    ┆ ---   │
+        │ str       ┆ str   ┆ f64    ┆ f64   │
+        ╞═══════════╪═══════╪════════╪═══════╡
+        │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
+        │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
+        │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
+        │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
+        └───────────┴───────┴────────┴───────┘
 
     Args:
         df: The source DataFrame to unpivot.
@@ -133,17 +133,30 @@ def pivot_longer_spec(
             "Kindly ensure the spec DataFrame's columns "
             "are not present in the source DataFrame."
         )
-
-    df_columns = pl.DataFrame({".name": df.columns})
-
-    spec = df_columns.join(spec, on=".name", how="left")
-    spec = spec.select(pl.exclude(".name"))
-    if len(spec.columns) == 1:
-        return _pivot_longer_dot_value_only(
-            df=df,
-            outcome=spec,
-        )
-    return
+    index = [
+        label for label in df.columns if label not in spec.get_column(".name")
+    ]
+    others = [
+        label for label in spec.columns if label not in {".name", ".value"}
+    ]
+    variable_name = "".join(df.columns + spec.columns)
+    variable_name = f"{variable_name}_"
+    if others:
+        dot_value_only = False
+        expression = pl.struct(others).alias(variable_name)
+        spec = spec.select(".name", ".value", expression)
+    else:
+        dot_value_only = True
+        expression = pl.cum_count(".value").over(".value").alias(variable_name)
+        spec = spec.with_columns(expression)
+    return _pivot_longer_dot_value(
+        df=df,
+        index=index,
+        spec=spec,
+        variable_name=variable_name,
+        dot_value_only=dot_value_only,
+        names_transform=None,
+    )
 
 
 def _pivot_longer(
@@ -168,119 +181,92 @@ def _pivot_longer(
             value_name=values_to,
         )
 
-    if isinstance(names_to, str):
-        names_to = [names_to]
-    elif isinstance(names_to, (list, tuple)):
-        uniques = set()
-        for word in names_to:
-            if not isinstance(word, str):
-                raise TypeError(
-                    f"'{word}' in names_to should be a string type; "
-                    f"instead got type {type(word).__name__}"
-                )
-            if (word in uniques) and (word != ".value"):
-                raise ValueError(f"'{word}' is duplicated in names_to.")
-            uniques.add(word)
-    else:
-        raise TypeError(
-            "names_to should be a string, list, or tuple; "
-            f"instead got type {type(names_to).__name__}"
-        )
-
-    if names_sep and names_pattern:
-        raise ValueError(
-            "Only one of names_pattern or names_sep should be provided."
-        )
-
-    if names_sep is not None:
-        check("names_sep", names_sep, [str])
-
-    else:
-        check("names_pattern", names_pattern, [str])
+    (
+        df,
+        index,
+        column_names,
+        names_to,
+        values_to,
+        names_sep,
+        names_pattern,
+    ) = _data_checks_pivot_longer(
+        df=df,
+        index=index,
+        column_names=column_names,
+        names_to=names_to,
+        values_to=values_to,
+        names_sep=names_sep,
+        names_pattern=names_pattern,
+    )
 
-    check("values_to", values_to, [str])
+    variable_name = "".join(df.columns)
+    variable_name = f"{variable_name}_"
+    spec = _pivot_longer_create_spec(
+        column_names=column_names,
+        names_to=names_to,
+        names_sep=names_sep,
+        names_pattern=names_pattern,
+        variable_name=variable_name,
+    )
 
-    if names_sep and (".value" not in names_to):
-        return _pivot_longer_names_sep_no_dot_value(
+    if ".value" not in names_to:
+        return _pivot_longer_no_dot_value(
             df=df,
             index=index,
+            spec=spec,
             column_names=column_names,
             names_to=names_to,
             values_to=values_to,
-            names_sep=names_sep,
-            names_transform=names_transform,
-        )
-    if names_pattern and (".value" not in names_to):
-        return _pivot_longer_names_pattern_no_dot_value(
-            df=df,
-            index=index,
-            column_names=column_names,
-            names_to=names_to,
-            values_to=values_to,
-            names_pattern=names_pattern,
-            names_transform=names_transform,
-        )
-    if names_sep:
-        return _pivot_longer_names_sep_dot_value(
-            df=df,
-            index=index,
-            column_names=column_names,
-            names_to=names_to,
-            names_sep=names_sep,
+            variable_name=variable_name,
             names_transform=names_transform,
         )
-    return _pivot_longer_names_pattern_dot_value(
+
+    if {".name", ".value"}.symmetric_difference(spec.columns):
+        dot_value_only = False
+    else:
+        dot_value_only = True
+        expression = pl.cum_count(".value").over(".value").alias(variable_name)
+        spec = spec.with_columns(expression)
+
+    return _pivot_longer_dot_value(
         df=df,
         index=index,
-        column_names=column_names,
-        names_to=names_to,
-        names_pattern=names_pattern,
+        spec=spec,
+        variable_name=variable_name,
+        dot_value_only=dot_value_only,
         names_transform=names_transform,
     )
 
 
-def _pivot_longer_names_sep_no_dot_value(
-    df: pl.DataFrame | pl.LazyFrame,
-    index: ColumnNameOrSelector,
-    column_names: ColumnNameOrSelector,
-    names_to: list | tuple,
-    values_to: str,
-    names_sep: str,
-    names_transform: pl.Expr,
-) -> pl.DataFrame | pl.LazyFrame:
+def _pivot_longer_create_spec(
+    column_names: list,
+    names_to: list,
+    names_sep: str | None,
+    names_pattern: str | None,
+    variable_name: str,
+) -> pl.DataFrame:
     """
-    flip polars Frame to long form,
-    if names_sep and no .value in names_to.
+    This is where the spec DataFrame is created,
+    before the transformation to long form.
     """
-    variable_name = "".join(df.columns)
-    # the implode approach is used here
-    # for efficiency
-    # it is much faster to extract the relevant strings
-    # on a smaller set and then explode
-    # than to melt into the full data and then extract
-    outcome = (
-        df.select(pl.all().implode())
-        .melt(
-            id_vars=index,
-            value_vars=column_names,
-            variable_name=variable_name,
-            value_name=values_to,
-        )
-        .with_columns(
-            pl.col(variable_name)
+    spec = pl.DataFrame({".name": column_names})
+    if names_sep is not None:
+        expression = (
+            pl.col(".name")
             .str.split(by=names_sep)
-            .list.to_struct(n_field_strategy="max_width"),
+            .list.to_struct(n_field_strategy="max_width")
+            .alias(variable_name)
         )
-    )
-    if isinstance(df, pl.LazyFrame):
-        extract = outcome.select(variable_name).collect().to_series(0)
     else:
-        extract = outcome.get_column(variable_name)
-
+        expression = (
+            pl.col(".name")
+            .str.extract_groups(pattern=names_pattern)
+            .alias(variable_name)
+        )
+    spec = spec.with_columns(expression)
+    len_fields = len(spec.get_column(variable_name).struct.fields)
     len_names_to = len(names_to)
 
-    len_fields = len(extract.struct.fields)
-
     if len_names_to != len_fields:
         raise ValueError(
             f"The length of names_to does not match "
@@ -289,488 +275,220 @@ def _pivot_longer_names_sep_no_dot_value(
             "while the number of fields extracted is "
             f"{len_fields}."
         )
-
-    expression = pl.col(variable_name).struct.rename_fields(names=names_to)
-    outcome = outcome.with_columns(expression)
-
-    if isinstance(df, pl.LazyFrame):
-        # to ensure the unnested columns are available downstream
-        # in a LazyFrame, a workaround is to reintroduce
-        # the variable_name column via with_columns
-        series = outcome.select(variable_name).collect()
-        outcome = outcome.with_columns(series)
-
-    outcome = outcome.unnest(variable_name)
-    if names_transform is not None:
-        outcome = outcome.with_columns(names_transform)
-
-    columns = [name for name in outcome.columns if name not in names_to]
-    outcome = outcome.explode(columns=columns)
-    return outcome
+    if names_pattern is not None:
+        expression = pl.exclude(".name").is_null().any()
+        expression = pl.any_horizontal(expression)
+        null_check = (
+            spec.unnest(columns=variable_name)
+            .filter(expression)
+            .get_column(".name")
+        )
+        if null_check.len():
+            column_name = null_check.gather(0).item()
+            raise ValueError(
+                f"Column label '{column_name}' "
+                "could not be matched with any of the groups "
+                "in the provided regex. Kindly provide a regular expression "
+                "(with the correct groups) that matches all labels in the columns."
+            )
+
+    if ".value" not in names_to:
+        spec = spec.get_column(variable_name)
+        spec = spec.struct.rename_fields(names=names_to)
+        return spec
+    if names_to.count(".value") == 1:
+        spec = spec.with_columns(
+            pl.col(variable_name).struct.rename_fields(names=names_to)
+        )
+        if ".value" not in names_to:
+            return spec.get_column(variable_name)
+        not_dot_value = [name for name in names_to if name != ".value"]
+        spec = spec.unnest(variable_name)
+        if not_dot_value:
+            return spec.select(
+                ".name",
+                ".value",
+                pl.struct(not_dot_value).alias(variable_name),
+            )
+        return spec.select(".name", ".value")
+    _spec = spec.get_column(variable_name)
+    _spec = _spec.struct.unnest()
+    fields = _spec.columns
+
+    if len(set(names_to)) == 1:
+        expression = pl.concat_str(fields).alias(".value")
+        dot_value = _spec.select(expression)
+        dot_value = dot_value.to_series(0)
+        return spec.select(".name", dot_value)
+    dot_value = [
+        field for field, label in zip(fields, names_to) if label == ".value"
+    ]
+    dot_value = pl.concat_str(dot_value).alias(".value")
+    not_dot_value = [
+        pl.col(field).alias(label)
+        for field, label in zip(fields, names_to)
+        if label != ".value"
+    ]
+    not_dot_value = pl.struct(not_dot_value).alias(variable_name)
+    return _spec.select(spec.get_column(".name"), not_dot_value, dot_value)
 
 
-def _pivot_longer_names_pattern_no_dot_value(
+def _pivot_longer_no_dot_value(
     df: pl.DataFrame | pl.LazyFrame,
+    spec: pl.DataFrame,
     index: ColumnNameOrSelector,
     column_names: ColumnNameOrSelector,
     names_to: list | tuple,
     values_to: str,
-    names_pattern: str,
+    variable_name: str,
     names_transform: pl.Expr,
 ) -> pl.DataFrame | pl.LazyFrame:
     """
     flip polars Frame to long form,
-    if names_pattern and no .value in names_to.
+    if no .value in names_to.
     """
-    variable_name = "".join(df.columns)
-    outcome = df.select(pl.all().implode())
-    outcome = outcome.melt(
-        id_vars=index,
-        value_vars=column_names,
-        variable_name=variable_name,
-        value_name=values_to,
-    )
-    alias = outcome.columns
-    alias = "".join(alias)
-    alias = f"{alias}_"
-    expression = pl.col(variable_name)
-    expression = expression.str.extract_groups(pattern=names_pattern)
-    expression = expression.alias(alias)
-    outcome = outcome.with_columns(expression)
-    extract = outcome.select(alias, variable_name)
-    is_a_lazyframe = isinstance(df, pl.LazyFrame)
-    if is_a_lazyframe:
-        extract = extract.collect()
-    len_fields = len(extract.get_column(alias).struct.fields)
-    len_names_to = len(names_to)
-
-    if len_names_to != len_fields:
-        raise ValueError(
-            f"The length of names_to does not match "
-            "the number of fields extracted. "
-            f"The length of names_to is {len_names_to} "
-            "while the number of fields extracted is "
-            f"{len_fields}."
+    # the implode/explode approach is used here
+    # for efficiency
+    # do the operation on a smaller size
+    # and then blow it up after
+    # it is usually much faster
+    # than running on the actual data
+    outcome = (
+        df.select(pl.all().implode())
+        .melt(
+            id_vars=index,
+            value_vars=column_names,
+            variable_name=variable_name,
+            value_name=values_to,
         )
-    expression = pl.exclude(variable_name).is_null().any()
-    expression = pl.any_horizontal(expression)
-    null_check = (
-        extract.unnest(alias).filter(expression).get_column(variable_name)
+        .with_columns(spec)
     )
-    if null_check.len():
-        column_name = null_check.gather(0).item()
-        raise ValueError(
-            f"Column label '{column_name}' "
-            "could not be matched with any of the groups "
-            "in the provided regex. Kindly provide a regular expression "
-            "(with the correct groups) that matches all labels in the columns."
-        )
-
-    expression = pl.col(alias).struct.rename_fields(names=names_to)
-    outcome = outcome.with_columns(expression)
 
-    outcome = outcome.select(pl.exclude(variable_name))
-    if is_a_lazyframe:
-        series = outcome.select(alias).collect()
-        outcome = outcome.with_columns(series)
-    outcome = outcome.unnest(alias)
+    outcome = outcome.unnest(variable_name)
     if names_transform is not None:
         outcome = outcome.with_columns(names_transform)
-
     columns = [name for name in outcome.columns if name not in names_to]
     outcome = outcome.explode(columns=columns)
     return outcome
 
 
-def _pivot_longer_names_sep_dot_value(
+def _pivot_longer_dot_value(
     df: pl.DataFrame | pl.LazyFrame,
+    spec: pl.DataFrame,
     index: ColumnNameOrSelector,
-    column_names: ColumnNameOrSelector,
-    names_to: list | tuple,
-    names_sep: str,
+    variable_name: str,
+    dot_value_only: bool,
     names_transform: pl.Expr,
 ) -> pl.DataFrame | pl.LazyFrame:
     """
     flip polars Frame to long form,
     if names_sep and .value in names_to.
     """
-
-    variable_name = "".join(df.columns)
-    value_name = f"{''.join(df.columns)}_"
-    outcome = _names_sep_reshape(
-        df=df,
-        index=index,
-        variable_name=variable_name,
-        column_names=column_names,
-        names_to=names_to,
-        value_name=value_name,
-        names_sep=names_sep,
-        names_transform=names_transform,
-    )
-
-    others = [name for name in names_to if name != ".value"]
-    if others:
-        return _pivot_longer_dot_value_others(
-            df=df,
-            outcome=outcome,
-            value_name=value_name,
-            others=others,
-        )
-    return _pivot_longer_dot_value_only(
-        df=df,
-        outcome=outcome,
-        variable_name=variable_name,
-        value_name=value_name,
-    )
-
-
-def _pivot_longer_names_pattern_dot_value(
-    df: pl.DataFrame | pl.LazyFrame,
-    index: ColumnNameOrSelector,
-    column_names: ColumnNameOrSelector,
-    names_to: list | tuple,
-    names_pattern: str,
-    names_transform: pl.Expr,
-) -> pl.DataFrame | pl.LazyFrame:
-    """
-    flip polars Frame to long form,
-    if names_pattern and .value in names_to.
-    """
-
-    variable_name = "".join(df.columns)
-    value_name = f"{''.join(df.columns)}_"
-    outcome = _names_pattern_reshape(
-        df=df,
-        index=index,
-        variable_name=variable_name,
-        column_names=column_names,
-        names_to=names_to,
-        value_name=value_name,
-        names_pattern=names_pattern,
-        names_transform=names_transform,
-    )
-
-    others = [name for name in names_to if name != ".value"]
-    if others:
-        return _pivot_longer_dot_value_others(
-            df=df,
-            outcome=outcome,
-            value_name=value_name,
-            others=others,
-        )
-    return _pivot_longer_dot_value_only(
-        df=df,
-        outcome=outcome,
-        value_name=value_name,
-    )
-
-
-def _pivot_longer_dot_value_only(
-    df: pl.DataFrame | pl.LazyFrame,
-    outcome: pl.DataFrame | pl.LazyFrame,
-    value_name: str,
-) -> pl.DataFrame | pl.LazyFrame:
-    """
-    Pivot to long form if '.value' only
-    """
-    # for .value reshaping, each sub Frame
-    # should have the same columns
-    # the code below creates a DataFrame of unique values
-    # (here we use cumcount to ensure uniqueness)
-    alias = "".join(outcome.columns)
-    expression = pl.cum_count(".value").over(".value").alias(alias)
-    outcome = outcome.with_columns(expression)
-    expr1 = pl.col(".value").unique().sort().implode()
-    expr2 = pl.col(alias).unique().sort().implode()
-    uniqs = outcome.select(expr1, expr2)
-    uniqs = uniqs.explode(".value")
-    uniqs = uniqs.explode(alias)
-    # uniqs is then joined to `outcome`
-    # to ensure all groups have the labels in .value
-    # this may introduce nulls if not all groups
-    # shared the same labels in .value prior to the join -
-    # the null check below handles that
-    outcome = uniqs.join(outcome, on=uniqs.columns, how="left")
-    # patch to deal with nulls
-    expression = pl.col(value_name).is_null().any()
-    null_check = outcome.select(expression)
-    is_a_lazyframe = isinstance(df, pl.LazyFrame)
-    if is_a_lazyframe:
-        null_check = null_check.collect()
-    null_check = null_check.item()
-    if null_check:
-        variable_name = "".join(outcome.columns)
-        expr1 = pl.lit(None).alias(variable_name)
-        expr2 = pl.implode(variable_name)
-        nulls = df.with_columns(expr1).select(expr2)
-        if is_a_lazyframe:
-            nulls = nulls.collect()
-        nulls = nulls.to_series(0)
-        expression = pl.col(value_name).fill_null(nulls)
-        outcome = outcome.with_columns(expression)
-
-    index = [
-        label
-        for label in outcome.columns
-        if label not in {alias, value_name, ".value"}
-    ]
-    # due to the implodes, index, if present is repeated
-    # however, we need index to be unique,
-    # hence the selection of only the first entry
-    # from the duplicated(repeated) index values in the list
-    agg_ = [pl.first(index), pl.col(".value"), pl.col(value_name)]
-    outcome = outcome.group_by(alias, maintain_order=True).agg(agg_)
-    # since all groups have the same labels in '.value'
-    # and order is assured in the group_by operation
-    # we just grab only the first row
-    # which will serve as headers of the new columns with values
-    fields = outcome.select(pl.first(".value"))
-    if is_a_lazyframe:
-        fields = fields.collect()
-    fields = fields.item().to_list()
-
-    outcome = outcome.select(pl.exclude(".value"))
-    expression = pl.col(value_name).list.to_struct(
-        n_field_strategy="max_width", fields=fields
-    )
-    outcome = outcome.with_columns(expression)
-    if is_a_lazyframe:
-        # to ensure the unnested columns are available downstream
-        # in a LazyFrame, a workaround is to reintroduce
-        # the value_name column via with_columns
-        series = outcome.select(value_name).collect()
-        outcome = outcome.with_columns(series)
+    spec = spec.group_by(variable_name)
+    spec = spec.agg(pl.all())
+    expressions = []
+    for names, fields in zip(
+        spec.get_column(".name").to_list(),
+        spec.get_column(".value").to_list(),
+    ):
+        expression = pl.struct(names).struct.rename_fields(names=fields)
+        expressions.append(expression)
+    expressions = [*index, *expressions]
+    spec = spec.get_column(variable_name)
     outcome = (
-        outcome.unnest(value_name)
-        .explode([*index, *fields])
-        .select(pl.exclude(alias))
+        df.select(expressions)
+        .select(pl.all().implode())
+        .melt(id_vars=index, variable_name=variable_name, value_name=".value")
+        .with_columns(spec)
     )
-    return outcome
 
-
-def _pivot_longer_dot_value_others(
-    df: pl.DataFrame | pl.LazyFrame,
-    outcome: pl.DataFrame | pl.LazyFrame,
-    value_name: str,
-    others: list,
-) -> pl.DataFrame | pl.LazyFrame:
-    """
-    Pivot to long form if '.value'
-    and `others`.
-    """
-    # logic breakdown is similar to _pivot_longer_dot_value_only
-    expr1 = pl.struct(others).unique().sort().implode()
-    expr2 = pl.col(".value").unique().sort().implode()
-    uniqs = outcome.select(expr1, expr2)
-    uniqs = uniqs.explode(others[0])
-    uniqs = uniqs.explode(".value")
-    uniqs = uniqs.unnest(others[0])
-
-    outcome = uniqs.join(outcome, on=uniqs.columns, how="left")
-
-    expression = pl.col(value_name).is_null().any()
-    null_check = outcome.select(expression)
-    is_a_lazyframe = isinstance(df, pl.LazyFrame)
-    if is_a_lazyframe:
-        null_check = null_check.collect()
-    null_check = null_check.item()
-    if null_check:
-        variable_name = "".join(outcome.columns)
-        expr1 = pl.lit(None).alias(variable_name)
-        expr2 = pl.implode(variable_name)
-        nulls = df.with_columns(expr1).select(expr2)
-        if is_a_lazyframe:
-            nulls = nulls.collect()
-        nulls = nulls.to_series(0)
-        expression = pl.col(value_name).fill_null(nulls)
-        outcome = outcome.with_columns(expression)
-
-    index = [
-        label
-        for label in outcome.columns
-        if label not in {*others, value_name, ".value"}
+    if dot_value_only:
+        columns = [
+            label for label in outcome.columns if label != variable_name
+        ]
+        outcome = outcome.explode(columns).unnest(".value")
+        outcome = outcome.select(pl.exclude(variable_name))
+        return outcome
+    outcome = outcome.unnest(variable_name)
+    if names_transform is not None:
+        outcome = outcome.with_columns(names_transform)
+    columns = [
+        label for label in outcome.columns if label not in spec.struct.fields
     ]
-    agg_ = [pl.first(index), pl.col(".value"), pl.col(value_name)]
-    outcome = outcome.group_by(others, maintain_order=True).agg(agg_)
-
-    fields = outcome.select(pl.first(".value"))
-    if is_a_lazyframe:
-        fields = fields.collect()
-    fields = fields.item().to_list()
-
-    outcome = outcome.select(pl.exclude(".value"))
-    expression = pl.col(value_name).list.to_struct(
-        n_field_strategy="max_width", fields=fields
-    )
-
-    outcome = outcome.with_columns(expression)
-    if is_a_lazyframe:
-        series = outcome.select(value_name).collect()
-        outcome = outcome.with_columns(series)
-    outcome = outcome.unnest(value_name).explode([*index, *fields])
+    outcome = outcome.explode(columns)
+    outcome = outcome.unnest(".value")
 
     return outcome
 
 
-def _names_sep_reshape(
-    df: pl.DataFrame | pl.LazyFrame,
-    index: ColumnNameOrSelector,
-    column_names: ColumnNameOrSelector,
-    names_to: list | tuple,
-    variable_name: str,
-    value_name: str,
-    names_sep: str,
-    names_transform: pl.Expr,
-) -> pl.DataFrame | pl.LazyFrame:
-    # the implode approach is used here
-    # for efficiency
-    # it is much faster to extract the relevant strings
-    # on a smaller set and then explode
-    # than to melt into the full data and then extract
-    outcome = (
-        df.select(pl.all().implode())
-        .melt(
-            id_vars=index,
-            value_vars=column_names,
-            variable_name=variable_name,
-            value_name=value_name,
-        )
-        .with_columns(
-            pl.col(variable_name)
-            .str.split(by=names_sep)
-            .list.to_struct(n_field_strategy="max_width"),
-        )
-    )
-
-    if isinstance(df, pl.LazyFrame):
-        extract = outcome.select(variable_name).collect().to_series(0)
-    else:
-        extract = outcome.get_column(variable_name)
+def _data_checks_pivot_longer(
+    df,
+    index,
+    column_names,
+    names_to,
+    values_to,
+    names_sep,
+    names_pattern,
+) -> tuple:
+    """
+    This function majorly does type checks on the passed arguments.
 
-    len_names_to = len(names_to)
+    This function is executed before proceeding to the computation phase.
 
-    len_fields = len(extract.struct.fields)
+    Type annotations are not provided because this function is where type
+    checking happens.
+    """
+    if isinstance(names_to, str):
+        names_to = [names_to]
+    elif isinstance(names_to, (list, tuple)):
+        uniques = set()
+        for word in names_to:
+            if not isinstance(word, str):
+                raise TypeError(
+                    f"'{word}' in names_to should be a string type; "
+                    f"instead got type {type(word).__name__}"
+                )
+            if (word in uniques) and (word != ".value"):
+                raise ValueError(f"'{word}' is duplicated in names_to.")
+            uniques.add(word)
+    else:
+        raise TypeError(
+            "names_to should be a string, list, or tuple; "
+            f"instead got type {type(names_to).__name__}"
+        )
 
-    if len_names_to != len_fields:
+    if names_sep and names_pattern:
         raise ValueError(
-            f"The length of names_to does not match "
-            "the number of fields extracted. "
-            f"The length of names_to is {len_names_to} "
-            "while the number of fields extracted is "
-            f"{len_fields}."
+            "Only one of names_pattern or names_sep should be provided."
         )
 
-    if names_to.count(".value") > 1:
-        _fields = extract.struct.fields
-        fields = [
-            extract.struct.field(label)
-            for label, name in zip(_fields, names_to)
-            if name == ".value"
-        ]
-        _value = pl.concat_str(fields).alias(".value")
-        fields = [
-            extract.struct.field(label).alias(name)
-            for label, name in zip(_fields, names_to)
-            if name != ".value"
-        ]
-        fields.append(_value)
-        extract = pl.struct(fields).alias(variable_name)
-        outcome = outcome.with_columns(extract)
-    else:
-        expression = pl.col(variable_name).struct.rename_fields(names=names_to)
-        outcome = outcome.with_columns(expression)
-    if isinstance(df, pl.LazyFrame):
-        # to ensure the unnested columns are available downstream
-        # in a LazyFrame, a workaround is to reintroduce
-        # the variable_name column via with_columns
-        series = outcome.select(variable_name).collect()
-        outcome = outcome.with_columns(series)
-    outcome = outcome.unnest(variable_name)
-    if names_transform is not None:
-        outcome = outcome.with_columns(names_transform)
-    return outcome
-
+    if names_sep is not None:
+        check("names_sep", names_sep, [str])
 
-def _names_pattern_reshape(
-    df: pl.DataFrame | pl.LazyFrame,
-    index: ColumnNameOrSelector,
-    column_names: ColumnNameOrSelector,
-    names_to: list | tuple,
-    variable_name: str,
-    value_name: str,
-    names_pattern: str,
-    names_transform: pl.Expr,
-) -> pl.DataFrame | pl.LazyFrame:
-    outcome = df.select(pl.all().implode())
-    outcome = outcome.melt(
-        id_vars=index,
-        value_vars=column_names,
-        variable_name=variable_name,
-        value_name=value_name,
-    )
-    alias = outcome.columns
-    alias = "".join(alias)
-    alias = f"{alias}_"
-    outcome = outcome.with_columns(
-        pl.col(variable_name)
-        .str.extract_groups(pattern=names_pattern)
-        .alias(alias)
-    )
-    extract = outcome.select(alias, variable_name)
-    is_a_lazyframe = isinstance(df, pl.LazyFrame)
-    if is_a_lazyframe:
-        extract = extract.collect()
-    len_fields = len(extract.get_column(alias).struct.fields)
-    len_names_to = len(names_to)
+    else:
+        check("names_pattern", names_pattern, [str])
 
-    if len_names_to != len_fields:
-        raise ValueError(
-            f"The length of names_to does not match "
-            "the number of fields extracted. "
-            f"The length of names_to is {len_names_to} "
-            "while the number of fields extracted is "
-            f"{len_fields}."
-        )
-    expression = pl.exclude(variable_name).is_null().any()
-    expression = pl.any_horizontal(expression)
-    null_check = (
-        extract.unnest(alias).filter(expression).get_column(variable_name)
-    )
-    if null_check.len():
-        column_name = null_check.gather(0).item()
-        raise ValueError(
-            f"Column label '{column_name}' "
-            "could not be matched with any of the groups "
-            "in the provided regex. Kindly provide a regular expression "
-            "(with the correct groups) that matches all labels in the columns."
-        )
+    check("values_to", values_to, [str])
 
-    if names_to.count(".value") > 1:
-        extract = extract.get_column(alias)
-        _fields = extract.struct.fields
-        fields = [
-            extract.struct.field(label)
-            for label, name in zip(_fields, names_to)
-            if name == ".value"
-        ]
-        _value = pl.concat_str(fields).alias(".value")
-        fields = [
-            extract.struct.field(label).alias(name)
-            for label, name in zip(_fields, names_to)
-            if name != ".value"
-        ]
-        fields.append(_value)
-        extract = pl.struct(fields).alias(alias)
-        outcome = outcome.with_columns(extract)
+    if (index is None) and (column_names is None):
+        column_names = df.columns
+        index = []
+    elif (index is None) and (column_names is not None):
+        column_names = df.select(column_names).columns
+        index = df.select(pl.exclude(column_names)).columns
+    elif (index is not None) and (column_names is None):
+        index = df.select(index).columns
+        column_names = df.select(pl.exclude(index)).columns
     else:
-        expression = pl.col(alias).struct.rename_fields(names=names_to)
-        outcome = outcome.with_columns(expression)
-
-    outcome = outcome.select(pl.exclude(variable_name))
-    if is_a_lazyframe:
-        series = outcome.select(alias).collect()
-        outcome = outcome.with_columns(series)
-    outcome = outcome.unnest(alias)
-    if names_transform is not None:
-        outcome = outcome.with_columns(names_transform)
-    return outcome
+        index = df.select(index).columns
+        column_names = df.select(column_names).columns
+
+    return (
+        df,
+        index,
+        column_names,
+        names_to,
+        values_to,
+        names_sep,
+        names_pattern,
+    )
diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
index 4de18f0d8..d062aac12 100644
--- a/tests/polars/functions/test_pivot_longer_polars.py
+++ b/tests/polars/functions/test_pivot_longer_polars.py
@@ -266,9 +266,6 @@ def test_names_pattern_dot_value(test_df):
             names_pattern="(.+)_(.+)",
         )
         .sort(by=["loc", "lat", "long"])
-        .with_columns(
-            pl.col("lat").cast(pl.Float64), pl.col("long").cast(pl.Float64)
-        )
         .select("set", "loc", "lat", "long")
     )
     assert_frame_equal(result, actual)
@@ -284,9 +281,6 @@ def test_names_sep_dot_value(test_df):
             names_sep="_",
         )
         .sort(by=["loc", "lat", "long"])
-        .with_columns(
-            pl.col("lat").cast(pl.Float64), pl.col("long").cast(pl.Float64)
-        )
         .select("set", "loc", "lat", "long")
     )
     assert_frame_equal(result, actual)
@@ -408,9 +402,6 @@ def test_multiple_dot_value():
             names_pattern=r"(x|y)_([0-9])(_mean|_sd)",
             names_transform=pl.col("time").cast(pl.Int64),
         )
-        .with_columns(
-            pl.col("x_mean").cast(pl.Int64), pl.col("y_mean").cast(pl.Int64)
-        )
         .select("unit", "time", "x_mean", "x_sd", "y_mean", "y_sd")
         .sort(by=pl.all())
     )
@@ -448,7 +439,7 @@ def test_multiple_dot_value2(single_val):
         index="id", names_to=(".value", ".value"), names_pattern="(.)(.)"
     )
 
-    assert_frame_equal(result, single_val, check_column_order=False)
+    assert_frame_equal(result, single_val)
 
 
 actual3 = [
@@ -472,7 +463,7 @@ def test_names_pattern_single_column(single_val):
         "id", names_to=".value", names_pattern="(.)."
     )
 
-    assert_frame_equal(result, actual3)
+    assert_frame_equal(result.sort(by=pl.all()), actual3.sort(by=pl.all()))
 
 
 def test_names_pattern_single_column_not_dot_value(single_val):
@@ -539,7 +530,6 @@ def test_names_pattern_nulls_in_data(df_null):
             names_to=[".value", "child"],
             names_pattern=r"(.+)_(.+)",
         )
-        .with_columns(pl.col("gender").cast(pl.Float64))
         .select("family", "child", "dob", "gender")
         .sort(by=pl.all())
     )

From b5a89a911229053ae8b58ee0cbc3c48a7c7fca24 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 18 Jun 2024 13:11:30 +1000
Subject: [PATCH 03/21] fix docs and tests

---
 janitor/polars/dataframe.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/janitor/polars/dataframe.py b/janitor/polars/dataframe.py
index e3d437d8b..ca0feb8ca 100644
--- a/janitor/polars/dataframe.py
+++ b/janitor/polars/dataframe.py
@@ -177,21 +177,21 @@ def pivot_longer(
             ...     index = 'Species',
             ...     names_to = ('part', 'dimension'),
             ...     names_sep = '.',
-            ... ).select('Species','part','dimension','value')
+            ... ).select('Species','part','dimension','value').sort(by=pl.all())
             shape: (8, 4)
             ┌───────────┬───────┬───────────┬───────┐
             │ Species   ┆ part  ┆ dimension ┆ value │
             │ ---       ┆ ---   ┆ ---       ┆ ---   │
             │ str       ┆ str   ┆ str       ┆ f64   │
             ╞═══════════╪═══════╪═══════════╪═══════╡
+            │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
+            │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
             │ setosa    ┆ Sepal ┆ Length    ┆ 5.1   │
-            │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
             │ setosa    ┆ Sepal ┆ Width     ┆ 3.5   │
-            │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
-            │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
             │ virginica ┆ Petal ┆ Length    ┆ 5.1   │
-            │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
             │ virginica ┆ Petal ┆ Width     ┆ 1.8   │
+            │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
+            │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
             └───────────┴───────┴───────────┴───────┘
 
             Retain parts of the column names as headers:
@@ -199,7 +199,7 @@ def pivot_longer(
             ...     index = 'Species',
             ...     names_to = ('part', '.value'),
             ...     names_sep = '.',
-            ... ).select('Species','part','Length','Width')
+            ... ).select('Species','part','Length','Width').sort(by=pl.all())
             shape: (4, 4)
             ┌───────────┬───────┬────────┬───────┐
             │ Species   ┆ part  ┆ Length ┆ Width │
@@ -207,8 +207,8 @@ def pivot_longer(
             │ str       ┆ str   ┆ f64    ┆ f64   │
             ╞═══════════╪═══════╪════════╪═══════╡
             │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
-            │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
             │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
+            │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
             │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
             └───────────┴───────┴────────┴───────┘
 

From 527893630cfcb61e21a4647bfe3598d2c3f35d4a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 22:49:26 +1000
Subject: [PATCH 04/21] fix doc

---
 janitor/polars/dataframe.py | 10 +++++-----
 janitor/polars/lazyframe.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/janitor/polars/dataframe.py b/janitor/polars/dataframe.py
index ca0feb8ca..d1423a2b8 100644
--- a/janitor/polars/dataframe.py
+++ b/janitor/polars/dataframe.py
@@ -155,21 +155,21 @@ def pivot_longer(
             └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
 
             Replicate polars' [melt](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html#polars-dataframe-melt):
-            >>> df.janitor.pivot_longer(index = 'Species')
+            >>> df.janitor.pivot_longer(index = 'Species').sort(by=pl.all())
             shape: (8, 3)
             ┌───────────┬──────────────┬───────┐
             │ Species   ┆ variable     ┆ value │
             │ ---       ┆ ---          ┆ ---   │
             │ str       ┆ str          ┆ f64   │
             ╞═══════════╪══════════════╪═══════╡
+            │ setosa    ┆ Petal.Length ┆ 1.4   │
+            │ setosa    ┆ Petal.Width  ┆ 0.2   │
             │ setosa    ┆ Sepal.Length ┆ 5.1   │
-            │ virginica ┆ Sepal.Length ┆ 5.9   │
             │ setosa    ┆ Sepal.Width  ┆ 3.5   │
-            │ virginica ┆ Sepal.Width  ┆ 3.0   │
-            │ setosa    ┆ Petal.Length ┆ 1.4   │
             │ virginica ┆ Petal.Length ┆ 5.1   │
-            │ setosa    ┆ Petal.Width  ┆ 0.2   │
             │ virginica ┆ Petal.Width  ┆ 1.8   │
+            │ virginica ┆ Sepal.Length ┆ 5.9   │
+            │ virginica ┆ Sepal.Width  ┆ 3.0   │
             └───────────┴──────────────┴───────┘
 
             Split the column labels into individual columns:
diff --git a/janitor/polars/lazyframe.py b/janitor/polars/lazyframe.py
index f059ab1f5..cd20b2f5a 100644
--- a/janitor/polars/lazyframe.py
+++ b/janitor/polars/lazyframe.py
@@ -154,21 +154,21 @@ def pivot_longer(
             │ 5.9          ┆ 3.0         ┆ 5.1          ┆ 1.8         ┆ virginica │
             └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
 
-            >>> df.janitor.pivot_longer(index = 'Species').collect()
+            >>> df.janitor.pivot_longer(index = 'Species').sort(by=pl.all()).collect()
             shape: (8, 3)
             ┌───────────┬──────────────┬───────┐
             │ Species   ┆ variable     ┆ value │
             │ ---       ┆ ---          ┆ ---   │
             │ str       ┆ str          ┆ f64   │
             ╞═══════════╪══════════════╪═══════╡
+            │ setosa    ┆ Petal.Length ┆ 1.4   │
+            │ setosa    ┆ Petal.Width  ┆ 0.2   │
             │ setosa    ┆ Sepal.Length ┆ 5.1   │
-            │ virginica ┆ Sepal.Length ┆ 5.9   │
             │ setosa    ┆ Sepal.Width  ┆ 3.5   │
-            │ virginica ┆ Sepal.Width  ┆ 3.0   │
-            │ setosa    ┆ Petal.Length ┆ 1.4   │
             │ virginica ┆ Petal.Length ┆ 5.1   │
-            │ setosa    ┆ Petal.Width  ┆ 0.2   │
             │ virginica ┆ Petal.Width  ┆ 1.8   │
+            │ virginica ┆ Sepal.Length ┆ 5.9   │
+            │ virginica ┆ Sepal.Width  ┆ 3.0   │
             └───────────┴──────────────┴───────┘
 
         !!! info "New in version 0.28.0"

From e8c3057ac526182eeda0ff4f85d81c42ed3dfebe Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 22:57:11 +1000
Subject: [PATCH 05/21] fix doc pivot_longer_spec

---
 janitor/polars/pivot_longer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index aaf66ff8f..49da64ea2 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -75,7 +75,7 @@ def pivot_longer_spec(
         │ Sepal.Width  ┆ Width  ┆ Sepal │
         │ Petal.Width  ┆ Width  ┆ Petal │
         └──────────────┴────────┴───────┘
-        >>> df.pipe(pivot_longer_spec,spec=spec)
+        >>> df.pipe(pivot_longer_spec,spec=spec).sort*by=pl.all())
         shape: (4, 4)
         ┌───────────┬───────┬────────┬───────┐
         │ Species   ┆ part  ┆ Length ┆ Width │
@@ -83,8 +83,8 @@ def pivot_longer_spec(
         │ str       ┆ str   ┆ f64    ┆ f64   │
         ╞═══════════╪═══════╪════════╪═══════╡
         │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
-        │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
         │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
+        │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
         │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
         └───────────┴───────┴────────┴───────┘
 

From 7c497cd3e97628574ffa4fad60501e9dc46bdf98 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 20 Jun 2024 23:02:10 +1000
Subject: [PATCH 06/21] fix doc pivot_longer_spec

---
 janitor/polars/pivot_longer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 49da64ea2..224604131 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -75,7 +75,7 @@ def pivot_longer_spec(
         │ Sepal.Width  ┆ Width  ┆ Sepal │
         │ Petal.Width  ┆ Width  ┆ Petal │
         └──────────────┴────────┴───────┘
-        >>> df.pipe(pivot_longer_spec,spec=spec).sort*by=pl.all())
+        >>> df.pipe(pivot_longer_spec,spec=spec).sort(by=pl.all())
         shape: (4, 4)
         ┌───────────┬───────┬────────┬───────┐
         │ Species   ┆ part  ┆ Length ┆ Width │

From 513fe73067c5e76b08e28a4d6e6b0679804ed0b6 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:10:29 +1000
Subject: [PATCH 07/21] updates

---
 janitor/polars/pivot_longer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index c1edf235f..b25974eae 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -217,7 +217,7 @@ def pivot_longer(
         └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
 
         Replicate polars' [melt](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html#polars-dataframe-melt):
-        >>> df.pivot_longer(index = 'Species')
+        >>> df.janitor.pivot_longer(index = 'Species').sort(by=pl.all())
         shape: (8, 3)
         ┌───────────┬──────────────┬───────┐
         │ Species   ┆ variable     ┆ value │
@@ -239,7 +239,7 @@ def pivot_longer(
         ...     index = 'Species',
         ...     names_to = ('part', 'dimension'),
         ...     names_sep = '.',
-        ... ).select('Species','part','dimension','value')
+        ... ).select('Species','part','dimension','value').sort(by=pl.all())
         shape: (8, 4)
         ┌───────────┬───────┬───────────┬───────┐
         │ Species   ┆ part  ┆ dimension ┆ value │
@@ -261,7 +261,7 @@ def pivot_longer(
         ...     index = 'Species',
         ...     names_to = ('part', '.value'),
         ...     names_sep = '.',
-        ... ).select('Species','part','Length','Width')
+        ... ).select('Species','part','Length','Width').sort(by=pl.all())
         shape: (4, 4)
         ┌───────────┬───────┬────────┬───────┐
         │ Species   ┆ part  ┆ Length ┆ Width │

From 23994846eafcd666f7dc0fd17c2a679a5ad893fc Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:14:49 +1000
Subject: [PATCH 08/21] updates

---
 janitor/polars/pivot_longer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index b25974eae..c11b71005 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -217,7 +217,7 @@ def pivot_longer(
         └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
 
         Replicate polars' [melt](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html#polars-dataframe-melt):
-        >>> df.janitor.pivot_longer(index = 'Species').sort(by=pl.all())
+        >>> df.pivot_longer(index = 'Species').sort(by=pl.all())
         shape: (8, 3)
         ┌───────────┬──────────────┬───────┐
         │ Species   ┆ variable     ┆ value │

From 49fc6384fa1b2117ddc3baafade7102368d1b169 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:19:01 +1000
Subject: [PATCH 09/21] updates

---
 janitor/polars/pivot_longer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index c11b71005..b07964bbb 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -224,14 +224,14 @@ def pivot_longer(
         │ ---       ┆ ---          ┆ ---   │
         │ str       ┆ str          ┆ f64   │
         ╞═══════════╪══════════════╪═══════╡
+        │ setosa    ┆ Petal.Length ┆ 1.4   │
+        │ setosa    ┆ Petal.Width  ┆ 0.2   │
         │ setosa    ┆ Sepal.Length ┆ 5.1   │
-        │ virginica ┆ Sepal.Length ┆ 5.9   │
         │ setosa    ┆ Sepal.Width  ┆ 3.5   │
-        │ virginica ┆ Sepal.Width  ┆ 3.0   │
-        │ setosa    ┆ Petal.Length ┆ 1.4   │
         │ virginica ┆ Petal.Length ┆ 5.1   │
-        │ setosa    ┆ Petal.Width  ┆ 0.2   │
         │ virginica ┆ Petal.Width  ┆ 1.8   │
+        │ virginica ┆ Sepal.Length ┆ 5.9   │
+        │ virginica ┆ Sepal.Width  ┆ 3.0   │
         └───────────┴──────────────┴───────┘
 
         Split the column labels into individual columns:

From 610794816140a0d62073b63f09bd687eeb07bfaf Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:26:12 +1000
Subject: [PATCH 10/21] fix docs

---
 janitor/polars/pivot_longer.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index b07964bbb..e8fd7f572 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -246,14 +246,14 @@ def pivot_longer(
         │ ---       ┆ ---   ┆ ---       ┆ ---   │
         │ str       ┆ str   ┆ str       ┆ f64   │
         ╞═══════════╪═══════╪═══════════╪═══════╡
+        │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
+        │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
         │ setosa    ┆ Sepal ┆ Length    ┆ 5.1   │
-        │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
         │ setosa    ┆ Sepal ┆ Width     ┆ 3.5   │
-        │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
-        │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
         │ virginica ┆ Petal ┆ Length    ┆ 5.1   │
-        │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
         │ virginica ┆ Petal ┆ Width     ┆ 1.8   │
+        │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
+        │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
         └───────────┴───────┴───────────┴───────┘
 
         Retain parts of the column names as headers:
@@ -268,10 +268,10 @@ def pivot_longer(
         │ ---       ┆ ---   ┆ ---    ┆ ---   │
         │ str       ┆ str   ┆ f64    ┆ f64   │
         ╞═══════════╪═══════╪════════╪═══════╡
-        │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
-        │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
         │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
+        │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
         │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
+        │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
         └───────────┴───────┴────────┴───────┘
 
         Split the column labels based on regex:

From f2b956b61733771e41adecbc040736521dd2cdc4 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 21 Jun 2024 09:35:15 +1000
Subject: [PATCH 11/21] fix tests

---
 tests/polars/functions/test_pivot_longer_polars.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
index 76a09efea..d2942c9fc 100644
--- a/tests/polars/functions/test_pivot_longer_polars.py
+++ b/tests/polars/functions/test_pivot_longer_polars.py
@@ -114,7 +114,7 @@ def test_names_pat_str(df_checks):
     and .value is present.
     """
     result = (
-        df_checks.janitor.pivot_longer(
+        df_checks.pivot_longer(
             index=["famid", "birth"],
             names_to=(".value", "age"),
             names_pattern="(.+)(.)",
@@ -254,7 +254,7 @@ def test_names_pattern_dot_value(test_df):
     """Test output for names_pattern and .value."""
 
     result = (
-        test_df.janitor.pivot_longer(
+        test_df.pivot_longer(
             column_names=cs.all(),
             names_to=["set", ".value"],
             names_pattern="(.+)_(.+)",
@@ -269,7 +269,7 @@ def test_names_sep_dot_value(test_df):
     """Test output for names_pattern and .value."""
 
     result = (
-        test_df.janitor.pivot_longer(
+        test_df.pivot_longer(
             column_names=cs.all(),
             names_to=["set", ".value"],
             names_sep="_",
@@ -480,7 +480,7 @@ def test_names_pattern_single_column_not_dot_value1(single_val):
     """
     result = (
         single_val.select("x1")
-        .janitor.pivot_longer(names_to="yA", names_pattern="(.+)")
+        .pivot_longer(names_to="yA", names_pattern="(.+)")
         .select("yA", "value")
     )
 

From d849cff9dd74409b3b07218b899e8bd1af75fcff Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 22 Jun 2024 12:45:40 +1000
Subject: [PATCH 12/21] change sort logic for `complete`

---
 janitor/polars/complete.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/janitor/polars/complete.py b/janitor/polars/complete.py
index ef098ede7..062fc3afd 100644
--- a/janitor/polars/complete.py
+++ b/janitor/polars/complete.py
@@ -110,7 +110,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().sort().alias("rar"),
+        ...         pl.struct("item_id", "item_name").unique().alias("rar"),
         ...         sort=True
         ...     )
         shape: (8, 5)
@@ -133,7 +133,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().sort().alias('rar'),
+        ...         pl.struct("item_id", "item_name").unique().alias('rar'),
         ...         fill_value={"value1": 0, "value2": 99},
         ...         explicit=True,
         ...         sort=True,
@@ -159,7 +159,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().sort().alias('rar'),
+        ...         pl.struct("item_id", "item_name").unique().alias('rar'),
         ...         fill_value={"value1": 0, "value2": 99},
         ...         explicit=False,
         ...         sort=True,
@@ -343,13 +343,9 @@ def _complete(
     for column in columns:
         if isinstance(column, str):
             col = pl.col(column).unique()
-            if sort:
-                col = col.sort()
             _columns.append(col)
         elif cs.is_selector(column):
             col = column.as_expr().unique()
-            if sort:
-                col = col.sort()
             _columns.append(col)
         elif isinstance(column, pl.Expr):
             _columns.append(column)
@@ -383,16 +379,35 @@ def _complete(
         for column in _columns:
             uniques = uniques.unnest(columns=column)
 
+    merge_columns = uniques.columns
+    if sort:
+        sort_index = "".join(uniques.columns + df.columns)
+        sort_index = f"{sort_index}_"
+        uniques = uniques.with_row_index(name=sort_index)
+    else:
+        sort_index = None
     no_columns_to_fill = set(df.columns) == set(uniques.columns)
     if fill_value is None or no_columns_to_fill:
-        return uniques.join(df, on=uniques.columns, how="full", coalesce=True)
+        if not sort:
+            return uniques.join(
+                df, on=merge_columns, how="full", coalesce=True
+            )
+        return (
+            uniques.join(df, on=merge_columns, how="full", coalesce=True)
+            .sort(by=sort_index)
+            .select(pl.exclude(sort_index))
+        )
     idx = None
     columns_to_select = df.columns
     if not explicit:
-        idx = "".join(df.columns)
+        idx = "".join(df.columns + uniques.columns)
         idx = f"{idx}_"
         df = df.with_row_index(name=idx)
-    df = uniques.join(df, on=uniques.columns, how="full", coalesce=True)
+    else:
+        idx = None
+    df = uniques.join(df, on=merge_columns, how="full", coalesce=True)
+    if sort:
+        df = df.sort(by=sort_index).select(pl.exclude(sort_index))
     # exclude columns that were not used
     # to generate the combinations
     exclude_columns = uniques.columns

From aee2b09d84a3fc334dc29916261d282576d5239a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 22 Jun 2024 12:57:32 +1000
Subject: [PATCH 13/21] updates to complete

---
 janitor/polars/complete.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/janitor/polars/complete.py b/janitor/polars/complete.py
index 062fc3afd..c3cff06e4 100644
--- a/janitor/polars/complete.py
+++ b/janitor/polars/complete.py
@@ -110,7 +110,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().alias("rar"),
+        ...         pl.struct("item_id", "item_name").unique().sort().alias("rar"),
         ...         sort=True
         ...     )
         shape: (8, 5)
@@ -133,7 +133,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().alias('rar'),
+        ...         pl.struct("item_id", "item_name").unique().sort().alias('rar'),
         ...         fill_value={"value1": 0, "value2": 99},
         ...         explicit=True,
         ...         sort=True,
@@ -159,7 +159,7 @@ def complete(
         >>> with pl.Config(tbl_rows=-1):
         ...     df.complete(
         ...         "group",
-        ...         pl.struct("item_id", "item_name").unique().alias('rar'),
+        ...         pl.struct("item_id", "item_name").unique().sort().alias('rar'),
         ...         fill_value={"value1": 0, "value2": 99},
         ...         explicit=False,
         ...         sort=True,
@@ -343,9 +343,13 @@ def _complete(
     for column in columns:
         if isinstance(column, str):
             col = pl.col(column).unique()
+            if sort:
+                col = col.sort()
             _columns.append(col)
         elif cs.is_selector(column):
             col = column.as_expr().unique()
+            if sort:
+                col = col.sort()
             _columns.append(col)
         elif isinstance(column, pl.Expr):
             _columns.append(column)
@@ -354,7 +358,7 @@ def _complete(
                 f"The argument passed to the columns parameter "
                 "should either be a string, a column selector, "
                 "or a polars expression, instead got - "
-                f"{type(column)}."
+                f"{type(column).__name__}."
             )
     by_does_not_exist = by is None
     if by_does_not_exist:

From 6a5f66e220a7a12bbe94fdea0d54930fc296d79a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 22 Jun 2024 15:35:42 +1000
Subject: [PATCH 14/21] restore inital setup for complete

---
 janitor/polars/complete.py | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/janitor/polars/complete.py b/janitor/polars/complete.py
index c3cff06e4..ef098ede7 100644
--- a/janitor/polars/complete.py
+++ b/janitor/polars/complete.py
@@ -358,7 +358,7 @@ def _complete(
                 f"The argument passed to the columns parameter "
                 "should either be a string, a column selector, "
                 "or a polars expression, instead got - "
-                f"{type(column).__name__}."
+                f"{type(column)}."
             )
     by_does_not_exist = by is None
     if by_does_not_exist:
@@ -383,35 +383,16 @@ def _complete(
         for column in _columns:
             uniques = uniques.unnest(columns=column)
 
-    merge_columns = uniques.columns
-    if sort:
-        sort_index = "".join(uniques.columns + df.columns)
-        sort_index = f"{sort_index}_"
-        uniques = uniques.with_row_index(name=sort_index)
-    else:
-        sort_index = None
     no_columns_to_fill = set(df.columns) == set(uniques.columns)
     if fill_value is None or no_columns_to_fill:
-        if not sort:
-            return uniques.join(
-                df, on=merge_columns, how="full", coalesce=True
-            )
-        return (
-            uniques.join(df, on=merge_columns, how="full", coalesce=True)
-            .sort(by=sort_index)
-            .select(pl.exclude(sort_index))
-        )
+        return uniques.join(df, on=uniques.columns, how="full", coalesce=True)
     idx = None
     columns_to_select = df.columns
     if not explicit:
-        idx = "".join(df.columns + uniques.columns)
+        idx = "".join(df.columns)
         idx = f"{idx}_"
         df = df.with_row_index(name=idx)
-    else:
-        idx = None
-    df = uniques.join(df, on=merge_columns, how="full", coalesce=True)
-    if sort:
-        df = df.sort(by=sort_index).select(pl.exclude(sort_index))
+    df = uniques.join(df, on=uniques.columns, how="full", coalesce=True)
     # exclude columns that were not used
     # to generate the combinations
     exclude_columns = uniques.columns

From 8ea3f5622c58cf5d936a4709457675da7235a7b6 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 22 Jun 2024 20:15:11 +1000
Subject: [PATCH 15/21] remove dead code

---
 janitor/polars/pivot_longer.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index e8fd7f572..37f937627 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -538,8 +538,6 @@ def _pivot_longer_create_spec(
         spec = spec.with_columns(
             pl.col(variable_name).struct.rename_fields(names=names_to)
         )
-        if ".value" not in names_to:
-            return spec.get_column(variable_name)
         not_dot_value = [name for name in names_to if name != ".value"]
         spec = spec.unnest(variable_name)
         if not_dot_value:

From cf350a38a7cf62a387a462fd2effc49e47100101 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 24 Jun 2024 07:13:01 +1000
Subject: [PATCH 16/21] use left join

---
 janitor/polars/complete.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/complete.py b/janitor/polars/complete.py
index ef098ede7..546f903bc 100644
--- a/janitor/polars/complete.py
+++ b/janitor/polars/complete.py
@@ -385,14 +385,14 @@ def _complete(
 
     no_columns_to_fill = set(df.columns) == set(uniques.columns)
     if fill_value is None or no_columns_to_fill:
-        return uniques.join(df, on=uniques.columns, how="full", coalesce=True)
+        return uniques.join(df, on=uniques.columns, how="left", coalesce=True)
     idx = None
     columns_to_select = df.columns
     if not explicit:
         idx = "".join(df.columns)
         idx = f"{idx}_"
         df = df.with_row_index(name=idx)
-    df = uniques.join(df, on=uniques.columns, how="full", coalesce=True)
+    df = uniques.join(df, on=uniques.columns, how="left", coalesce=True)
     # exclude columns that were not used
     # to generate the combinations
     exclude_columns = uniques.columns

From 8fe093cb83901f25bb7b46e1246b9a55c61feba7 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Wed, 26 Jun 2024 22:37:19 +1000
Subject: [PATCH 17/21] update docs for pivot_longer

---
 janitor/polars/pivot_longer.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 37f937627..9dea2581f 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -33,7 +33,7 @@ def pivot_longer_spec(
     becomes variables.
 
     It can come in handy for situations where
-    `janitor.polars.pivot_longer`
+    [`pivot_longer`][janitor.polars.pivot_longer.pivot_longer]
     seems inadequate for the transformation.
 
     !!! info "New in version 0.28.0"
@@ -187,8 +187,11 @@ def pivot_longer(
     All measured variables are *unpivoted* (and typically duplicated) along the
     row axis.
 
+    If `names_pattern`, use a valid regular expression pattern containing at least
+    one capture group, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
+
     For more granular control on the unpivoting, have a look at
-    `pivot_longer_spec`.
+    [`pivot_longer_spec`][janitor.polars.pivot_longer.pivot_longer_spec].
 
     `pivot_longer` can also be applied to a LazyFrame.
 

From 8dd1d82abefe500dfc7339e485586f957f1a0529 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 27 Jun 2024 11:38:27 +1000
Subject: [PATCH 18/21] WIP - expand

---
 janitor/polars/__init__.py |   2 +
 janitor/polars/expand.py   | 215 +++++++++++++++++++++++++++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100644 janitor/polars/expand.py

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 1485ad3f2..5ee31ef28 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -1,5 +1,6 @@
 from .clean_names import clean_names, make_clean_names
 from .complete import complete
+from .expand import expand
 from .pivot_longer import pivot_longer, pivot_longer_spec
 from .row_to_names import row_to_names
 
@@ -10,4 +11,5 @@
     "make_clean_names",
     "row_to_names",
     "complete",
+    "expand",
 ]
diff --git a/janitor/polars/expand.py b/janitor/polars/expand.py
new file mode 100644
index 000000000..10a0c8a49
--- /dev/null
+++ b/janitor/polars/expand.py
@@ -0,0 +1,215 @@
+"""expland implementation for polars."""
+
+from __future__ import annotations
+
+from janitor.utils import check, import_message
+
+from .polars_flavor import register_dataframe_method, register_lazyframe_method
+
+try:
+    import polars as pl
+    import polars.selectors as cs
+    from polars.type_aliases import ColumnNameOrSelector
+except ImportError:
+    import_message(
+        submodule="polars",
+        package="polars",
+        conda_channel="conda-forge",
+        pip_install=True,
+    )
+
+
+@register_lazyframe_method
+@register_dataframe_method
+def expand(
+    df: pl.DataFrame | pl.LazyFrame,
+    *columns: tuple[ColumnNameOrSelector],
+    by: ColumnNameOrSelector = None,
+    sort: bool = False,
+) -> pl.DataFrame | pl.LazyFrame:
+    """
+    Creates a DataFrame from a cartesian combination of all inputs.
+
+    Inspiration is from tidyr's expand() function.
+
+    If `by` is present, the DataFrame is *expanded* per group.
+
+    `expand` can also be applied to a LazyFrame.
+
+    !!! info "New in version 0.28.0"
+
+    Examples:
+        >>> import pandas as pd
+        >>> import janitor
+        >>> data = [{'type': 'apple', 'year': 2010, 'size': 'XS'},
+        ...         {'type': 'orange', 'year': 2010, 'size': 'S'},
+        ...         {'type': 'apple', 'year': 2012, 'size': 'M'},
+        ...         {'type': 'orange', 'year': 2010, 'size': 'S'},
+        ...         {'type': 'orange', 'year': 2011, 'size': 'S'},
+        ...         {'type': 'orange', 'year': 2012, 'size': 'M'}]
+        >>> df = pd.DataFrame(data)
+        >>> df
+             type  year size
+        0   apple  2010   XS
+        1  orange  2010    S
+        2   apple  2012    M
+        3  orange  2010    S
+        4  orange  2011    S
+        5  orange  2012    M
+
+        Get unique observations:
+        >>> df.expand('type')
+             type
+        0   apple
+        1  orange
+        >>> df.expand('size')
+          size
+        0   XS
+        1    S
+        2    M
+        >>> df.expand('type', 'size')
+             type size
+        0   apple   XS
+        1   apple    S
+        2   apple    M
+        3  orange   XS
+        4  orange    S
+        5  orange    M
+        >>> df.expand('type','size','year')
+              type size  year
+        0    apple   XS  2010
+        1    apple   XS  2012
+        2    apple   XS  2011
+        3    apple    S  2010
+        4    apple    S  2012
+        5    apple    S  2011
+        6    apple    M  2010
+        7    apple    M  2012
+        8    apple    M  2011
+        9   orange   XS  2010
+        10  orange   XS  2012
+        11  orange   XS  2011
+        12  orange    S  2010
+        13  orange    S  2012
+        14  orange    S  2011
+        15  orange    M  2010
+        16  orange    M  2012
+        17  orange    M  2011
+
+        Get observations that only occur in the data:
+        >>> df.expand(['type','size'])
+             type size
+        0   apple   XS
+        1  orange    S
+        2   apple    M
+        3  orange    M
+        >>> df.expand(['type','size','year'])
+             type size  year
+        0   apple   XS  2010
+        1  orange    S  2010
+        2   apple    M  2012
+        3  orange    S  2011
+        4  orange    M  2012
+
+        Expand the DataFrame to include new observations:
+        >>> df.expand('type','size',{'new_year':range(2010,2014)})
+              type size  new_year
+        0    apple   XS      2010
+        1    apple   XS      2011
+        2    apple   XS      2012
+        3    apple   XS      2013
+        4    apple    S      2010
+        5    apple    S      2011
+        6    apple    S      2012
+        7    apple    S      2013
+        8    apple    M      2010
+        9    apple    M      2011
+        10   apple    M      2012
+        11   apple    M      2013
+        12  orange   XS      2010
+        13  orange   XS      2011
+        14  orange   XS      2012
+        15  orange   XS      2013
+        16  orange    S      2010
+        17  orange    S      2011
+        18  orange    S      2012
+        19  orange    S      2013
+        20  orange    M      2010
+        21  orange    M      2011
+        22  orange    M      2012
+        23  orange    M      2013
+
+        Filter for missing observations:
+        >>> combo = df.expand('type','size','year')
+        >>> anti_join = df.merge(combo, how='right', indicator=True)
+        >>> anti_join.query("_merge=='right_only").drop(columns="_merge")
+              type  year size
+        1    apple  2012   XS
+        2    apple  2011   XS
+        3    apple  2010    S
+        4    apple  2012    S
+        5    apple  2011    S
+        6    apple  2010    M
+        8    apple  2011    M
+        9   orange  2010   XS
+        10  orange  2012   XS
+        11  orange  2011   XS
+        14  orange  2012    S
+        16  orange  2010    M
+        18  orange  2011    M
+
+        Expand within each group, using `by`:
+        >>> df.expand('year','size',by='type')
+                year size
+        type
+        apple   2010   XS
+        apple   2010    M
+        apple   2012   XS
+        apple   2012    M
+        orange  2010    S
+        orange  2010    M
+        orange  2011    S
+        orange  2011    M
+        orange  2012    S
+        orange  2012    M
+
+    Args:
+        df: A pandas DataFrame/LazyFrame.
+        columns: Specification of columns to expand.
+        by: If present, the DataFrame is expanded per group.
+
+    Returns:
+        A polars DataFrame/LazyFrame.
+    """
+    if not columns:
+        return df
+    check("sort", sort, [bool])
+    _columns = []
+    for column in columns:
+        if isinstance(column, str):
+            col = pl.col(column)
+            if sort:
+                col = col.sort()
+            _columns.append(col.implode())
+        elif cs.is_selector(column):
+            col = column.as_expr()
+            if sort:
+                col = col.sort()
+            _columns.append(col.implode())
+        elif isinstance(column, (pl.Expr, pl.Series)):
+            _columns.append(column)
+        else:
+            raise TypeError(
+                f"The argument passed to the columns parameter "
+                "should either be a string, a column selector, "
+                "or a polars expression, instead got - "
+                f"{type(column)}."
+            )
+    by_does_not_exist = by is None
+    if by_does_not_exist:
+        df = df.select(_columns)
+    else:
+        df = df.group_by(by, maintain_order=sort).agg(_columns)
+    for column in df.columns:
+        df = df.explode(column)
+    return df

From 83296d1f4cf90c7295c3d434331c42dcfc8d9586 Mon Sep 17 00:00:00 2001
From: Samuel Oranyeli <samueloranyeli@gmail.com>
Date: Thu, 27 Jun 2024 12:40:22 +1000
Subject: [PATCH 19/21] Delete janitor/polars/expand.py

---
 janitor/polars/expand.py | 215 ---------------------------------------
 1 file changed, 215 deletions(-)
 delete mode 100644 janitor/polars/expand.py

diff --git a/janitor/polars/expand.py b/janitor/polars/expand.py
deleted file mode 100644
index 10a0c8a49..000000000
--- a/janitor/polars/expand.py
+++ /dev/null
@@ -1,215 +0,0 @@
-"""expland implementation for polars."""
-
-from __future__ import annotations
-
-from janitor.utils import check, import_message
-
-from .polars_flavor import register_dataframe_method, register_lazyframe_method
-
-try:
-    import polars as pl
-    import polars.selectors as cs
-    from polars.type_aliases import ColumnNameOrSelector
-except ImportError:
-    import_message(
-        submodule="polars",
-        package="polars",
-        conda_channel="conda-forge",
-        pip_install=True,
-    )
-
-
-@register_lazyframe_method
-@register_dataframe_method
-def expand(
-    df: pl.DataFrame | pl.LazyFrame,
-    *columns: tuple[ColumnNameOrSelector],
-    by: ColumnNameOrSelector = None,
-    sort: bool = False,
-) -> pl.DataFrame | pl.LazyFrame:
-    """
-    Creates a DataFrame from a cartesian combination of all inputs.
-
-    Inspiration is from tidyr's expand() function.
-
-    If `by` is present, the DataFrame is *expanded* per group.
-
-    `expand` can also be applied to a LazyFrame.
-
-    !!! info "New in version 0.28.0"
-
-    Examples:
-        >>> import pandas as pd
-        >>> import janitor
-        >>> data = [{'type': 'apple', 'year': 2010, 'size': 'XS'},
-        ...         {'type': 'orange', 'year': 2010, 'size': 'S'},
-        ...         {'type': 'apple', 'year': 2012, 'size': 'M'},
-        ...         {'type': 'orange', 'year': 2010, 'size': 'S'},
-        ...         {'type': 'orange', 'year': 2011, 'size': 'S'},
-        ...         {'type': 'orange', 'year': 2012, 'size': 'M'}]
-        >>> df = pd.DataFrame(data)
-        >>> df
-             type  year size
-        0   apple  2010   XS
-        1  orange  2010    S
-        2   apple  2012    M
-        3  orange  2010    S
-        4  orange  2011    S
-        5  orange  2012    M
-
-        Get unique observations:
-        >>> df.expand('type')
-             type
-        0   apple
-        1  orange
-        >>> df.expand('size')
-          size
-        0   XS
-        1    S
-        2    M
-        >>> df.expand('type', 'size')
-             type size
-        0   apple   XS
-        1   apple    S
-        2   apple    M
-        3  orange   XS
-        4  orange    S
-        5  orange    M
-        >>> df.expand('type','size','year')
-              type size  year
-        0    apple   XS  2010
-        1    apple   XS  2012
-        2    apple   XS  2011
-        3    apple    S  2010
-        4    apple    S  2012
-        5    apple    S  2011
-        6    apple    M  2010
-        7    apple    M  2012
-        8    apple    M  2011
-        9   orange   XS  2010
-        10  orange   XS  2012
-        11  orange   XS  2011
-        12  orange    S  2010
-        13  orange    S  2012
-        14  orange    S  2011
-        15  orange    M  2010
-        16  orange    M  2012
-        17  orange    M  2011
-
-        Get observations that only occur in the data:
-        >>> df.expand(['type','size'])
-             type size
-        0   apple   XS
-        1  orange    S
-        2   apple    M
-        3  orange    M
-        >>> df.expand(['type','size','year'])
-             type size  year
-        0   apple   XS  2010
-        1  orange    S  2010
-        2   apple    M  2012
-        3  orange    S  2011
-        4  orange    M  2012
-
-        Expand the DataFrame to include new observations:
-        >>> df.expand('type','size',{'new_year':range(2010,2014)})
-              type size  new_year
-        0    apple   XS      2010
-        1    apple   XS      2011
-        2    apple   XS      2012
-        3    apple   XS      2013
-        4    apple    S      2010
-        5    apple    S      2011
-        6    apple    S      2012
-        7    apple    S      2013
-        8    apple    M      2010
-        9    apple    M      2011
-        10   apple    M      2012
-        11   apple    M      2013
-        12  orange   XS      2010
-        13  orange   XS      2011
-        14  orange   XS      2012
-        15  orange   XS      2013
-        16  orange    S      2010
-        17  orange    S      2011
-        18  orange    S      2012
-        19  orange    S      2013
-        20  orange    M      2010
-        21  orange    M      2011
-        22  orange    M      2012
-        23  orange    M      2013
-
-        Filter for missing observations:
-        >>> combo = df.expand('type','size','year')
-        >>> anti_join = df.merge(combo, how='right', indicator=True)
-        >>> anti_join.query("_merge=='right_only").drop(columns="_merge")
-              type  year size
-        1    apple  2012   XS
-        2    apple  2011   XS
-        3    apple  2010    S
-        4    apple  2012    S
-        5    apple  2011    S
-        6    apple  2010    M
-        8    apple  2011    M
-        9   orange  2010   XS
-        10  orange  2012   XS
-        11  orange  2011   XS
-        14  orange  2012    S
-        16  orange  2010    M
-        18  orange  2011    M
-
-        Expand within each group, using `by`:
-        >>> df.expand('year','size',by='type')
-                year size
-        type
-        apple   2010   XS
-        apple   2010    M
-        apple   2012   XS
-        apple   2012    M
-        orange  2010    S
-        orange  2010    M
-        orange  2011    S
-        orange  2011    M
-        orange  2012    S
-        orange  2012    M
-
-    Args:
-        df: A pandas DataFrame/LazyFrame.
-        columns: Specification of columns to expand.
-        by: If present, the DataFrame is expanded per group.
-
-    Returns:
-        A polars DataFrame/LazyFrame.
-    """
-    if not columns:
-        return df
-    check("sort", sort, [bool])
-    _columns = []
-    for column in columns:
-        if isinstance(column, str):
-            col = pl.col(column)
-            if sort:
-                col = col.sort()
-            _columns.append(col.implode())
-        elif cs.is_selector(column):
-            col = column.as_expr()
-            if sort:
-                col = col.sort()
-            _columns.append(col.implode())
-        elif isinstance(column, (pl.Expr, pl.Series)):
-            _columns.append(column)
-        else:
-            raise TypeError(
-                f"The argument passed to the columns parameter "
-                "should either be a string, a column selector, "
-                "or a polars expression, instead got - "
-                f"{type(column)}."
-            )
-    by_does_not_exist = by is None
-    if by_does_not_exist:
-        df = df.select(_columns)
-    else:
-        df = df.group_by(by, maintain_order=sort).agg(_columns)
-    for column in df.columns:
-        df = df.explode(column)
-    return df

From f1fab2eafebfa572c3d2f77509e743ef7af31a42 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 27 Jun 2024 12:48:26 +1000
Subject: [PATCH 20/21] remove expand

---
 janitor/polars/__init__.py | 2 --
 pyproject.toml             | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 5ee31ef28..1485ad3f2 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -1,6 +1,5 @@
 from .clean_names import clean_names, make_clean_names
 from .complete import complete
-from .expand import expand
 from .pivot_longer import pivot_longer, pivot_longer_spec
 from .row_to_names import row_to_names
 
@@ -11,5 +10,4 @@
     "make_clean_names",
     "row_to_names",
     "complete",
-    "expand",
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..d80aaa4fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 0
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 2b986145fc6a8e17258ba0ed509a92647f1cb7d6 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Thu, 27 Jun 2024 12:48:32 +1000
Subject: [PATCH 21/21] remove expand

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d80aaa4fa..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 0
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false