From 0285ae83d245406eb72754926e7f79278416fd9b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 13 Nov 2024 09:08:00 +0000 Subject: [PATCH 1/2] perf: concatenate less in pandas-like with_columns --- narwhals/_pandas_like/dataframe.py | 36 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 3bf60f845..1893bb14e 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -422,27 +422,31 @@ def with_columns( if fast_path: new_column_name_to_new_column_map = {s.name: s for s in new_columns} - to_concat = [] - # Make sure to preserve column order - for name in self._native_frame.columns: - if name in new_column_name_to_new_column_map: - to_concat.append( - validate_dataframe_comparand( - index, new_column_name_to_new_column_map.pop(name) - ) - ) - else: - to_concat.append(self._native_frame[name]) - to_concat.extend( - validate_dataframe_comparand(index, new_column_name_to_new_column_map[s]) - for s in new_column_name_to_new_column_map + columns_that_stay_the_same = list( + set(self.columns).difference(new_column_name_to_new_column_map) ) - + frame = select_columns_by_name( + self._native_frame, + columns_that_stay_the_same, + self._backend_version, + self._implementation, + ) + other = [ + validate_dataframe_comparand(index, value).rename(key, copy=False) + for key, value in new_column_name_to_new_column_map.items() + if key not in frame.columns + ] df = horizontal_concat( - to_concat, + [frame, *other], implementation=self._implementation, backend_version=self._backend_version, ) + order = self.columns + [ + x for x in new_column_name_to_new_column_map if x not in self.columns + ] + df = select_columns_by_name( + df, order, self._backend_version, self._implementation + ) else: # This is the logic in pandas' DataFrame.assign if self._backend_version < (2,): From fd5aca432fc00bed7b301edaabf0e2ec9b0927ab Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 13 Nov 2024 09:13:44 +0000 Subject: [PATCH 2/2] simplify --- narwhals/_pandas_like/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 1893bb14e..d28dfb416 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -432,7 +432,7 @@ def with_columns( self._implementation, ) other = [ - validate_dataframe_comparand(index, value).rename(key, copy=False) + validate_dataframe_comparand(index, value) for key, value in new_column_name_to_new_column_map.items() if key not in frame.columns ]