From 8c48c3947ea271c232dd0bbb49409e0d09cb82a9 Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
Date: Wed, 30 Oct 2024 13:20:41 +0100
Subject: [PATCH 01/32] Update test case for issue #5771 showing it is resolved
 (#13180)

With #13066 we run optimizer rules on subquries by default which
resolved issue #5771
---
 datafusion/core/tests/dataframe/mod.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 0c3c2a99517e..439aa6147e9b 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -114,10 +114,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
                     .await?
                     .aggregate(vec![], vec![count(wildcard())])?
                     .select(vec![count(wildcard())])?
-                    .into_unoptimized_plan(),
-                // Usually, into_optimized_plan() should be used here, but due to
-                // https://github.com/apache/datafusion/issues/5771,
-                // subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here.
+                    .into_optimized_plan()?,
             ),
         ))?
         .select(vec![col("a"), col("b")])?

From 903a098365df120c935eef0bd57b85cd09411ab4 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Wed, 30 Oct 2024 17:31:37 +0100
Subject: [PATCH 02/32] Test LIKE with dynamic pattern  (#13141)

* Add more string test data

* Test LIKE with dynamic pattern

LIKE pattern doesn't have to be static. Add test cases with dynamic LIKE
to prevent potential regressions.
---
 .../sqllogictest/test_files/aggregate.slt     |   2 +-
 datafusion/sqllogictest/test_files/ddl.slt    |   2 +-
 .../test_files/string/dictionary_utf8.slt     |   2 +
 .../test_files/string/init_data.slt.part      |   2 +
 .../test_files/string/large_string.slt        |   4 +
 .../sqllogictest/test_files/string/string.slt | 102 +++++++++++++++
 .../test_files/string/string_query.slt.part   | 122 +++++++++++++++++-
 7 files changed, 230 insertions(+), 6 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index f03c3700ab9f..aac315ea0efd 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -6080,7 +6080,7 @@ ORDER BY k;
 statement ok
 CREATE TABLE t1(v1 int);
 
-# issue: https://github.com/apache/datafusion/issues/12814 
+# issue: https://github.com/apache/datafusion/issues/12814
 statement error DataFusion error: Error during planning: Aggregate functions are not allowed in the WHERE clause. Consider using HAVING instead
 SELECT v1 FROM t1 WHERE ((count(v1) % 1) << 1) > 0;
 
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index 3205920d7110..4a0ba87bfa1a 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -804,4 +804,4 @@ query error DataFusion error: Schema error: No field named a\.
 EXPLAIN CREATE TABLE t(a int) AS VALUES (a + a);
 
 statement error DataFusion error: Schema error: No field named a\.
-CREATE TABLE t(a int) AS SELECT x FROM (VALUES (a)) t(x) WHERE false;
\ No newline at end of file
+CREATE TABLE t(a int) AS SELECT x FROM (VALUES (a)) t(x) WHERE false;
diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
index c181f613ee9a..04bd00e316e8 100644
--- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
+++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
@@ -51,6 +51,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false true false
 Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
+under_score un iść core false false false false
+percent pan Tadeusz ma iść w kąt false false false false
 NULL NULL NULL NULL NULL NULL
 
 #
diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part b/datafusion/sqllogictest/test_files/string/init_data.slt.part
index 096e3bb3b330..18cd022f7882 100644
--- a/datafusion/sqllogictest/test_files/string/init_data.slt.part
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -20,6 +20,8 @@ create table test_source as values
   ('Andrew', 'X', 'datafusion📊🔥', '🔥'),
   ('Xiangpeng', 'Xiangpeng', 'datafusion数据融合', 'datafusion数据融合'),
   ('Raphael', 'R', 'datafusionДатаФусион', 'аФус'),
+  ('under_score', 'un_____core', 'un iść core', 'chrząszcz na łące w 東京都'),
+  ('percent', 'p%t', 'pan Tadeusz ma iść w kąt', 'Pan Tadeusz ma frunąć stąd w kąt'),
   (NULL, 'R', NULL, '🔥');
 
 # --------------------------------------
diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt
index 8d8a5711bdb8..2063eae0f8ed 100644
--- a/datafusion/sqllogictest/test_files/string/large_string.slt
+++ b/datafusion/sqllogictest/test_files/string/large_string.slt
@@ -41,6 +41,8 @@ SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
 Andrew X datafusion📊🔥 🔥
 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
 Raphael R datafusionДатаФусион аФус
+under_score un_____core un iść core chrząszcz na łące w 東京都
+percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
 NULL R NULL 🔥
 
 # TODO: move it back to `string_query.slt.part` after fixing the issue
@@ -57,6 +59,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false true false
 Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
+under_score un iść core false false false false
+percent pan Tadeusz ma iść w kąt false false false false
 NULL NULL NULL NULL NULL NULL
 
 #
diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt
index e84342abd3df..62b0cae5f665 100644
--- a/datafusion/sqllogictest/test_files/string/string.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -48,6 +48,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false true false
 Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
+under_score un iść core false false false false
+percent pan Tadeusz ma iść w kąt false false false false
 NULL NULL NULL NULL NULL NULL
 
 #
@@ -55,6 +57,106 @@ NULL NULL NULL NULL NULL NULL
 #
 include ./string_query.slt.part
 
+# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
+# dynamic LIKE as filter
+query TTT rowsort
+SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2
+UNION ALL
+SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2
+UNION ALL
+SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2
+UNION ALL
+SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2
+UNION ALL
+SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2
+UNION ALL
+SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
+----
+Andrew is NOT LIKE X
+Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
+Raphael is NOT LIKE R
+Xiangpeng is LIKE Xiangpeng
+chrząszcz na łące w 東京都 is NOT LIKE un_____core
+datafusionДатаФусион is NOT LIKE R
+datafusion数据融合 is NOT LIKE Xiangpeng
+datafusion数据融合 is NOT LIKE Xiangpeng
+datafusion📊🔥 is NOT LIKE X
+pan Tadeusz ma iść w kąt is LIKE p%t
+percent is LIKE p%t
+un iść core is LIKE un_____core
+under_score is LIKE un_____core
+аФус is NOT LIKE R
+🔥 is NOT LIKE R
+🔥 is NOT LIKE X
+
+# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
+# dynamic LIKE as projection
+query TTTTBBBB rowsort
+SELECT
+    ascii_1, ascii_2, unicode_1, unicode_2,
+    (ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2,
+    (ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1,
+    (unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2,
+    (unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
+FROM test_basic_operator
+----
+Andrew X datafusion📊🔥 🔥 false false false false
+NULL R NULL 🔥 NULL NULL NULL false
+Raphael R datafusionДатаФусион аФус false false false false
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
+percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
+under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
+
+# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
+# dynamic ILIKE as filter
+query TTT rowsort
+SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2
+UNION ALL
+SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2
+UNION ALL
+SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2
+UNION ALL
+SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2
+UNION ALL
+SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2
+UNION ALL
+SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
+----
+Andrew is NOT ILIKE X
+Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
+Raphael is NOT ILIKE R
+Xiangpeng is ILIKE Xiangpeng
+chrząszcz na łące w 東京都 is NOT ILIKE un_____core
+datafusionДатаФусион is NOT ILIKE R
+datafusion数据融合 is NOT ILIKE Xiangpeng
+datafusion数据融合 is NOT ILIKE Xiangpeng
+datafusion📊🔥 is NOT ILIKE X
+pan Tadeusz ma iść w kąt is ILIKE p%t
+percent is ILIKE p%t
+un iść core is ILIKE un_____core
+under_score is ILIKE un_____core
+аФус is NOT ILIKE R
+🔥 is NOT ILIKE R
+🔥 is NOT ILIKE X
+
+# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
+# dynamic ILIKE as projection
+query TTTTBBBB rowsort
+SELECT
+    ascii_1, ascii_2, unicode_1, unicode_2,
+    (ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2,
+    (ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1,
+    (unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2,
+    (unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
+FROM test_basic_operator
+----
+Andrew X datafusion📊🔥 🔥 false false false false
+NULL R NULL 🔥 NULL NULL NULL false
+Raphael R datafusionДатаФусион аФус false false false false
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
+percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
+under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
+
 #
 # Clean up
 #
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index dc5626b7d573..24ac379ca5a0 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -26,6 +26,8 @@ SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
 Andrew X datafusion📊🔥 🔥
 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
 Raphael R datafusionДатаФусион аФус
+under_score un_____core un iść core chrząszcz na łące w 東京都
+percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
 NULL R NULL 🔥
 
 # --------------------------------------
@@ -42,6 +44,8 @@ select ascii_1, ascii_2 from test_basic_operator where ascii_1 <> ascii_2
 ----
 Andrew X
 Raphael R
+under_score un_____core
+percent p%t
 
 query TT
 select unicode_1, unicode_2 from test_basic_operator where unicode_1 = unicode_2
@@ -53,6 +57,8 @@ select unicode_1, unicode_2 from test_basic_operator where unicode_1 <> unicode_
 ----
 datafusion📊🔥 🔥
 datafusionДатаФусион аФус
+un iść core chrząszcz na łące w 東京都
+pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
 
 query TT
 select ascii_1, unicode_1 from test_basic_operator where ascii_1 = unicode_1
@@ -64,6 +70,8 @@ select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1
 Andrew datafusion📊🔥
 Xiangpeng datafusion数据融合
 Raphael datafusionДатаФусион
+under_score un iść core
+percent pan Tadeusz ma iść w kąt
 
 # --------------------------------------
 # column comparison
@@ -82,6 +90,8 @@ from test_basic_operator;
 Andrew X datafusion📊🔥 🔥 false true false true false true
 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true false true false false true
 Raphael R datafusionДатаФусион аФус false true false true false true
+under_score un_____core un iść core chrząszcz na łące w 東京都 false true false true false true
+percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt false true false true false true
 NULL R NULL 🔥 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -99,6 +109,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false false true
 Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
+under_score un iść core false true false true
+percent pan Tadeusz ma iść w kąt false true false true
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -116,6 +128,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false false true
 Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
+under_score un iść core false true false true
+percent pan Tadeusz ma iść w kąt false true false true
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -133,6 +147,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 true false false true
 Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
+under_score un iść core false true false true
+percent pan Tadeusz ma iść w kąt false true false true
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -175,6 +191,8 @@ FROM test_basic_operator;
 And X dat 🔥
 Xia Xia dat dat
 Rap R dat аФу
+und un_ un  chr
+per p%t pan Pan
 NULL R NULL 🔥
 
 # --------------------------------------
@@ -187,7 +205,7 @@ SELECT
 FROM
     test_basic_operator
 ----
-3 3
+5 5
 
 query II
 SELECT
@@ -200,6 +218,8 @@ GROUP BY ascii_2;
 1 1
 1 1
 1 1
+1 1
+1 1
 
 query II
 SELECT
@@ -212,6 +232,8 @@ GROUP BY unicode_2;
 1 1
 1 1
 1 1
+1 1
+1 1
 
 # --------------------------------------
 # STARTS_WITH function
@@ -228,6 +250,8 @@ FROM test_basic_operator
 false false false false
 true true false false
 true false false false
+false false false false
+false false false false
 NULL NULL NULL NULL
 
 query BBBB
@@ -241,6 +265,8 @@ FROM test_basic_operator
 true false true false
 false false true true
 false false true false
+false false false false
+false false false false
 NULL false NULL false
 
 # --------------------------------------
@@ -255,6 +281,8 @@ FROM test_basic_operator;
 Andrew
 Xiangpeng
 Raphael
+under_scrre
+percent
 NULL
 
 query T
@@ -265,6 +293,8 @@ FROM test_basic_operator;
 databusirn📊🔥
 databusirn数据融合
 databusirnДатаФусион
+un iść crre
+pan Tadeusz ma iść w kąt
 NULL
 
 # --------------------------------------
@@ -280,6 +310,8 @@ FROM test_basic_operator;
 Andrfw
 Xiangpfng
 Raphafl
+undfr_score
+pfrcent
 NULL
 
 # Should run REGEXP_REPLACE with Scalar value for string with flag
@@ -291,6 +323,8 @@ FROM test_basic_operator;
 Andrfw
 Xiangpfng
 Raphafl
+undfr_score
+pfrcent
 NULL
 
 # Should run REGEXP_REPLACE with ScalarArray value for string
@@ -302,6 +336,8 @@ FROM test_basic_operator;
 Andrew
 Xiangpeng
 Raphael
+bar
+bar
 NULL
 
 # Should run REGEXP_REPLACE with ScalarArray value for string with flag
@@ -313,6 +349,8 @@ FROM test_basic_operator;
 Andrew
 Xiangpeng
 Raphael
+bar
+bar
 NULL
 
 # --------------------------------------
@@ -333,6 +371,8 @@ FROM test_lowercase;
 Andrew Datafusion📊🔥
 Xiangpeng Datafusion数据融合
 Raphael Datafusionдатафусион
+Under_Score Un Iść Core
+Percent Pan Tadeusz Ma Iść W KąT
 NULL NULL
 
 statement ok
@@ -353,6 +393,8 @@ FROM test_basic_operator;
 65 88 100 128293
 88 88 100 100
 82 82 100 1072
+117 117 117 99
+112 112 112 80
 NULL 82 NULL 128293
 
 # --------------------------------------
@@ -373,6 +415,8 @@ FROM test_basic_operator;
 Andrew ndrew NULL datafusion📊🔥 datafusion📊 NULL
 Xiangpeng Xiangpeng NULL datafusion数据融合 datafusion数据融合 NULL
 Raphael Raphael NULL datafusionДатаФусион datafusionДатаФусион NULL
+under_score under_score NULL un iść core un iść core NULL
+percent percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -393,6 +437,8 @@ FROM test_basic_operator;
 Andrew Andrew NULL datafusion📊🔥 NULL datafusion📊🔥
 Xiangpeng (empty) NULL datafusion数据融合 NULL datafusion数据融合
 Raphael aphael NULL datafusionДатаФусион NULL datafusionДатаФусион
+under_score der_score NULL un iść core NULL un iść core
+percent ercent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -412,6 +458,8 @@ FROM test_basic_operator;
 And Andrew Andrew NULL datafusion📊
 Xiangpeng (empty) Xiangpeng NULL datafusion数据融合
 Raphael Raphael Raphael NULL datafusionДатаФусион
+under_sco under_s under_score NULL un iść core
+percent percen percent NULL pan Tadeusz ma iść w kąt
 NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -431,6 +479,8 @@ FROM test_basic_operator;
 false false NULL true NULL true
 false true NULL true NULL false
 false true NULL true NULL false
+false false NULL false NULL false
+false false NULL false NULL false
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -443,6 +493,8 @@ SELECT LOWER(ascii_1) as c1, LOWER(unicode_1) as c2 FROM test_basic_operator;
 andrew datafusion📊🔥
 xiangpeng datafusion数据融合
 raphael datafusionдатафусион
+under_score un iść core
+percent pan tadeusz ma iść w kąt
 NULL NULL
 
 # --------------------------------------
@@ -455,6 +507,8 @@ SELECT UPPER(ascii_1) as c1, UPPER(unicode_1) as c2 FROM test_basic_operator;
 ANDREW DATAFUSION📊🔥
 XIANGPENG DATAFUSION数据融合
 RAPHAEL DATAFUSIONДАТАФУСИОН
+UNDER_SCORE UN IŚĆ CORE
+PERCENT PAN TADEUSZ MA IŚĆ W KĄT
 NULL NULL
 
 # --------------------------------------
@@ -480,6 +534,8 @@ FROM test_basic_operator;
 Andrew:Data AndrewX Andrew Andrewdatafusion📊🔥 Andrew🔥 datafusion📊🔥Andrew datafusion📊🔥🔥 datafusion📊🔥 datafusion📊🔥🔥 🔥 (empty) Andrew,datafusion📊🔥
 Xiangpeng:Data XiangpengXiangpeng Xiangpeng Xiangpengdatafusion数据融合 Xiangpengdatafusion数据融合 datafusion数据融合Xiangpeng datafusion数据融合datafusion数据融合 datafusion数据融合 datafusion数据融合🔥 🔥 (empty) Xiangpeng,datafusion数据融合
 Raphael:Data RaphaelR Raphael RaphaeldatafusionДатаФусион RaphaelаФус datafusionДатаФусионRaphael datafusionДатаФусионаФус datafusionДатаФусион datafusionДатаФусион🔥 🔥 (empty) Raphael,datafusionДатаФусион
+under_score:Data under_scoreun_____core under_score under_scoreun iść core under_scorechrząszcz na łące w 東京都 un iść coreunder_score un iść corechrząszcz na łące w 東京都 un iść core un iść core🔥 🔥 (empty) under_score,un iść core
+percent:Data percentp%t percent percentpan Tadeusz ma iść w kąt percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątpercent pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt🔥 🔥 (empty) percent,pan Tadeusz ma iść w kąt
 :Data R (empty) (empty) 🔥 (empty) 🔥 (empty) 🔥 🔥 (empty) ,
 
 # --------------------------------------
@@ -499,6 +555,8 @@ FROM test_basic_operator;
 Afooew dfoofusion📊🔥 A🔥drew d🔥tafusion📊🔥 NULL NULL
 Xfoogpeng dfoofusion数据融合 X🔥angpeng d🔥tafusion数据融合 NULL NULL
 Rfooael dfoofusionДатаФусион R🔥phael d🔥tafusionДатаФусион NULL NULL
+ufoor_score ufoość core u🔥der_score u🔥 iść core NULL NULL
+pfooent pfooTadeusz ma iść w kąt p🔥rcent p🔥n Tadeusz ma iść w kąt NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -518,6 +576,8 @@ FROM test_basic_operator;
 Andrew Andrew NULL datafusion📊bar NULL datafusion📊bar
 Xiangpeng bar NULL bar NULL datafusion数据融合
 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион
+under_score under_score NULL un iść core NULL un iść core
+percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -537,6 +597,8 @@ FROM test_basic_operator;
 rew (empty) rew n📊🔥 (empty) afusion📊🔥
 eng (empty) ngpeng 据融合 (empty) afusion数据融合
 ael (empty) hael ион (empty) afusionДатаФусион
+ore (empty) er_score ore (empty) iść core
+ent (empty) cent kąt (empty)  Tadeusz ma iść w kąt
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -557,6 +619,8 @@ FROM test_basic_operator;
 And (empty) And dat (empty) datafusio
 Xia (empty) Xiangp dat (empty) datafusion数
 Rap (empty) Raph dat (empty) datafusionДатаФус
+und (empty) under_sc un  (empty) un iść c
+per (empty) perc pan (empty) pan Tadeusz ma iść w 
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -574,6 +638,8 @@ FROM test_basic_operator;
 Andrew Andrew datafusion📊🔥 datafusion📊🔥
 Xi Xiangpeng datafusion数据融合 datafusion数据融合
 R Raph datafusionД datafusionДат
+under_score under_score un iść core un iść core
+percent percent pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -591,6 +657,8 @@ FROM test_basic_operator;
 0 1 0 1
 0 2 0 2
 0 3 0 3
+0 0 0 0
+0 0 0 0
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -610,6 +678,8 @@ FROM test_basic_operator;
 Andrewfoo Andrew🔥 datafusion📊🔥foo datafusion📊🔥🔥
 Xiangpengfoo Xiangpeng🔥 datafusion数据融合foo datafusion数据融合🔥
 Raphaelfoo Raphael🔥 datafusionДатаФусионfoo datafusionДатаФусион🔥
+under_scorefoo under_score🔥 un iść corefoo un iść core🔥
+percentfoo percent🔥 pan Tadeusz ma iść w kątfoo pan Tadeusz ma iść w kąt🔥
 NULL NULL NULL NULL
 
 # || same type (column1 has null, so also tests NULL || NULL)
@@ -625,6 +695,8 @@ FROM test_basic_operator;
 AndrewX Andrew🔥 datafusion📊🔥X datafusion📊🔥🔥
 XiangpengXiangpeng Xiangpengdatafusion数据融合 datafusion数据融合Xiangpeng datafusion数据融合datafusion数据融合
 RaphaelR RaphaelаФус datafusionДатаФусионR datafusionДатаФусионаФус
+under_scoreun_____core under_scorechrząszcz na łące w 東京都 un iść coreun_____core un iść corechrząszcz na łące w 東京都
+percentp%t percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątp%t pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -640,6 +712,8 @@ FROM test_basic_operator;
 false false
 true false
 false true
+false false
+false false
 NULL NULL
 
 query BB
@@ -651,6 +725,8 @@ FROM test_basic_operator;
 true false
 false false
 false true
+false false
+false false
 NULL NULL
 
 query BB
@@ -662,6 +738,8 @@ FROM test_basic_operator;
 true true
 true false
 true true
+true true
+true true
 NULL NULL
 
 query BB
@@ -673,6 +751,8 @@ FROM test_basic_operator;
 true true
 false false
 true true
+true true
+true true
 NULL NULL
 
 # --------------------------------------
@@ -691,6 +771,8 @@ from test_basic_operator;
 Andrew nice Andrew and X datafusion📊🔥 cool datafusion📊🔥 and 🔥 Andrew 🔥 datafusion📊🔥
 Xiangpeng nice Xiangpeng and Xiangpeng datafusion数据融合 cool datafusion数据融合 and datafusion数据融合 Xiangpeng 🔥 datafusion数据融合
 Raphael nice Raphael and R datafusionДатаФусион cool datafusionДатаФусион and аФус Raphael 🔥 datafusionДатаФусион
+under_score nice under_score and un_____core un iść core cool un iść core and chrząszcz na łące w 東京都 under_score 🔥 un iść core
+percent nice percent and p%t pan Tadeusz ma iść w kąt cool pan Tadeusz ma iść w kąt and Pan Tadeusz ma frunąć stąd w kąt percent 🔥 pan Tadeusz ma iść w kąt
 NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -725,6 +807,8 @@ from test_basic_operator;
 Andrew datafusion📊🔥 false false false false
 Xiangpeng datafusion数据融合 false false false false
 Raphael datafusionДатаФусион false false false false
+under_score un iść core false false false false
+percent pan Tadeusz ma iść w kąt false false false false
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -741,6 +825,8 @@ FROM
 6 12
 9 14
 7 20
+11 11
+7 24
 NULL NULL
 
 # --------------------------------------
@@ -758,6 +844,8 @@ FROM test_basic_operator;
 true true NULL NULL
 false true NULL NULL
 false true NULL NULL
+false false NULL NULL
+false false NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -775,6 +863,8 @@ FROM test_basic_operator;
 true false NULL NULL
 false false NULL NULL
 false true NULL NULL
+false false NULL NULL
+false false NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -792,6 +882,8 @@ FROM test_basic_operator;
 0 4 NULL NULL
 7 0 NULL NULL
 6 10 NULL NULL
+8 13 NULL NULL
+6 19 NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -809,6 +901,8 @@ FROM test_basic_operator;
 xxxxxxxxxxxxxxAndrew NULL 🔥🔥🔥🔥🔥🔥🔥🔥datafusion📊🔥 NULL
 xxxxxxxxxxxXiangpeng NULL 🔥🔥🔥🔥🔥🔥datafusion数据融合 NULL
 xxxxxxxxxxxxxRaphael NULL datafusionДатаФусион NULL
+xxxxxxxxxunder_score NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥un iść core NULL
+xxxxxxxxxxxxxpercent NULL pan Tadeusz ma iść w NULL
 NULL NULL NULL NULL
 
 query TT
@@ -820,6 +914,8 @@ FROM test_basic_operator;
               Andrew         datafusion📊🔥
            Xiangpeng       datafusion数据融合
              Raphael datafusionДатаФусион
+         under_score          un iść core
+             percent pan Tadeusz ma iść w
 NULL NULL
 
 # --------------------------------------
@@ -837,6 +933,8 @@ FROM test_basic_operator;
 Andrewxxxxxxxxxxxxxx NULL datafusion📊🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL
 Xiangpengxxxxxxxxxxx NULL datafusion数据融合🔥🔥🔥🔥🔥🔥 NULL
 Raphaelxxxxxxxxxxxxx NULL datafusionДатаФусион NULL
+under_scorexxxxxxxxx NULL un iść core🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL
+percentxxxxxxxxxxxxx NULL pan Tadeusz ma iść w NULL
 NULL NULL NULL NULL
 
 query TT
@@ -845,9 +943,11 @@ SELECT
   RPAD(unicode_1, 20)
 FROM test_basic_operator;
 ----
-Andrew              datafusion📊🔥
-Xiangpeng           datafusion数据融合
-Raphael             datafusionДатаФусион
+Andrew               datafusion📊🔥        
+Xiangpeng            datafusion数据融合      
+Raphael              datafusionДатаФусион
+under_score          un iść core         
+percent              pan Tadeusz ma iść w
 NULL NULL
 
 # --------------------------------------
@@ -871,6 +971,8 @@ SELECT
 false false NULL NULL true false NULL NULL
 true false NULL NULL true false NULL NULL
 false true NULL NULL false true NULL NULL
+false false NULL NULL false false NULL NULL
+false false NULL NULL false false NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -895,6 +997,8 @@ NULL NULL NULL NULL [An] NULL NULL NULL
 [an] NULL NULL NULL [an] NULL NULL NULL
 NULL NULL NULL NULL NULL [таФ] NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
 # Test REPEAT
@@ -909,6 +1013,8 @@ FROM test_basic_operator;
 AndrewAndrewAndrew datafusion📊🔥datafusion📊🔥datafusion📊🔥
 XiangpengXiangpengXiangpeng datafusion数据融合datafusion数据融合datafusion数据融合
 RaphaelRaphaelRaphael datafusionДатаФусионdatafusionДатаФусионdatafusionДатаФусион
+under_scoreunder_scoreunder_score un iść coreun iść coreun iść core
+percentpercentpercent pan Tadeusz ma iść w kątpan Tadeusz ma iść w kątpan Tadeusz ma iść w kąt
 NULL NULL
 
 # --------------------------------------
@@ -928,6 +1034,8 @@ FROM test_basic_operator;
 Andr w NULL datafusion📊🔥 (empty) NULL
 Xiangp ng NULL datafusion数据融合 (empty) NULL
 Rapha l NULL datafusionДатаФус он NULL
+und r_scor NULL un iść core (empty) NULL
+p rc NULL pan Tadeusz ma iść w kąt (empty) NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -943,6 +1051,8 @@ FROM test_basic_operator;
 werdnA 🔥📊noisufatad
 gnepgnaiX 合融据数noisufatad
 leahpaR ноисуФатаДnoisufatad
+erocs_rednu eroc ćśi nu
+tnecrep tąk w ćśi am zsuedaT nap
 NULL NULL
 
 # --------------------------------------
@@ -962,6 +1072,8 @@ FROM test_basic_operator;
 5 0 NULL 0 0 NULL
 7 3 NULL 0 0 NULL
 6 0 NULL 18 18 NULL
+4 0 NULL 0 0 NULL
+2 0 NULL 0 0 NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -981,4 +1093,6 @@ FROM test_basic_operator;
 Andr Andrew NULL datafusion📊🔥 datafusion📊🔥 NULL
 Xiangp Xi NULL datafusion数据融合 datafusion数 NULL
 Rapha Raphael NULL datafusionДатаФус datafusionДатаФусион NULL
+und under_score NULL un iść core un iść core NULL
+p percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL
 NULL NULL NULL NULL NULL NULL

From d36c229e03551f55dada8be41fc4646d897c7b78 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 30 Oct 2024 12:32:29 -0400
Subject: [PATCH 03/32] Minor: improve testing docs, mention `cargo nextest`
 (#13160)

---
 docs/source/contributor-guide/testing.md | 26 +++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md
index 90e39c0057c1..b955b09050b3 100644
--- a/docs/source/contributor-guide/testing.md
+++ b/docs/source/contributor-guide/testing.md
@@ -21,11 +21,27 @@
 
 Tests are critical to ensure that DataFusion is working properly and
 is not accidentally broken during refactorings. All new features
-should have test coverage.
+should have test coverage and the entire test suite is run as part of CI.
 
-DataFusion has several levels of tests in its [Test
-Pyramid](https://martinfowler.com/articles/practical-test-pyramid.html)
-and tries to follow the Rust standard [Testing Organization](https://doc.rust-lang.org/book/ch11-03-test-organization.html) in the The Book.
+DataFusion has several levels of tests in its [Test Pyramid] and tries to follow
+the Rust standard [Testing Organization] described in [The Book].
+
+Run tests using `cargo`:
+
+```shell
+cargo test
+```
+
+You can also use other runners such as [cargo-nextest].
+
+```shell
+cargo nextest run
+```
+
+[test pyramid]: https://martinfowler.com/articles/practical-test-pyramid.html
+[testing organization]: https://doc.rust-lang.org/book/ch11-03-test-organization.html
+[the book]: https://doc.rust-lang.org/book/
+[cargo-nextest]: https://nexte.st/
 
 ## Unit tests
 
@@ -34,7 +50,7 @@ The [test_util](https://github.com/apache/datafusion/tree/main/datafusion/common
 
 ## sqllogictests Tests
 
-DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like any other Rust test using `cargo test --test sqllogictests`.
+DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like other tests using `cargo test --test sqllogictests`.
 
 `sqllogictests` tests may be less convenient for new contributors who are familiar with writing `.rs` tests as they require learning another tool. However, `sqllogictest` based tests are much easier to develop and maintain as they 1) do not require a slow recompile/link cycle and 2) can be automatically updated via `cargo test --test sqllogictests -- --complete`.
 

From eb558de1e05ae11a1f14d60239336e7853c4f972 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Wed, 30 Oct 2024 12:34:42 -0400
Subject: [PATCH 04/32] minor: Update HOWTO to help with updating new docs
 (#13172)

* update docs

* small fix

* Update docs/source/contributor-guide/howtos.md

* Update docs/source/contributor-guide/howtos.md

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 docs/source/contributor-guide/howtos.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 4e52a2fbcaa6..f105ab2c42db 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -45,6 +45,8 @@ Below is a checklist of what you need to do to add a new scalar function to Data
 - In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
   - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
 - Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md)
+  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
+  - Run `./dev/update_function_docs.sh` to update docs
 
 [advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
 [sqllogictest/test_files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files
@@ -64,6 +66,8 @@ Below is a checklist of what you need to do to add a new aggregate function to D
 - In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
   - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
 - Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md)
+  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
+  - Run `./dev/update_function_docs.sh` to update docs
 
 ## How to display plans graphically
 

From 63e8e6a2d8f629524ac66ded209283b388a68999 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 30 Oct 2024 12:42:16 -0400
Subject: [PATCH 05/32] Add config option `skip_physical_aggregate_schema_check
 ` (#13176)

* Add option to skip physical aggregate check

* tweak wording

* update test
---
 datafusion/common/src/config.rs                       | 11 +++++++++++
 datafusion/core/src/physical_planner.rs               |  8 +++++---
 .../sqllogictest/test_files/information_schema.slt    |  2 ++
 docs/source/user-guide/configs.md                     |  1 +
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 15290204fbac..1fa32aefb33f 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -268,6 +268,17 @@ config_namespace! {
         /// Defaults to the number of CPU cores on the system
         pub planning_concurrency: usize, default = num_cpus::get()
 
+        /// When set to true, skips verifying that the schema produced by
+        /// planning the input of `LogicalPlan::Aggregate` exactly matches the
+        /// schema of the input plan.
+        ///
+        /// When set to false, if the schema does not match exactly
+        /// (including nullability and metadata), a planning error will be raised.
+        ///
+        /// This is used to workaround bugs in the planner that are now caught by
+        /// the new schema verification step.
+        pub skip_physical_aggregate_schema_check: bool, default = false
+
         /// Specifies the reserved memory for each spillable sort operation to
         /// facilitate an in-memory merge.
         ///
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index ffedc2d6b6ef..5aa702550b08 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -649,14 +649,16 @@ impl DefaultPhysicalPlanner {
                 aggr_expr,
                 ..
             }) => {
+                let options = session_state.config().options();
                 // Initially need to perform the aggregate and then merge the partitions
                 let input_exec = children.one()?;
                 let physical_input_schema = input_exec.schema();
                 let logical_input_schema = input.as_ref().schema();
-                let physical_input_schema_from_logical: Arc<Schema> =
-                    logical_input_schema.as_ref().clone().into();
+                let physical_input_schema_from_logical = logical_input_schema.inner();
 
-                if physical_input_schema != physical_input_schema_from_logical {
+                if &physical_input_schema != physical_input_schema_from_logical
+                    && !options.execution.skip_physical_aggregate_schema_check
+                {
                     return internal_err!("Physical input schema should be the same as the one converted from logical input schema.");
                 }
 
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 3630f6c36595..03ab4a090e67 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -210,6 +210,7 @@ datafusion.execution.parquet.writer_version 1.0
 datafusion.execution.planning_concurrency 13
 datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8
 datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000
+datafusion.execution.skip_physical_aggregate_schema_check false
 datafusion.execution.soft_max_rows_per_output_file 50000000
 datafusion.execution.sort_in_place_threshold_bytes 1048576
 datafusion.execution.sort_spill_reservation_bytes 10485760
@@ -302,6 +303,7 @@ datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer ve
 datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
 datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input
 datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode
+datafusion.execution.skip_physical_aggregate_schema_check false When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.
 datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max
 datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.
 datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 91a2e8b4389a..e920189713fc 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -79,6 +79,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                           |
 | datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                           |
 | datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.skip_physical_aggregate_schema_check               | false                     | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.                                                                                                                                                                        |
 | datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                        |
 | datafusion.execution.sort_in_place_threshold_bytes                      | 1048576                   | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.execution.meta_fetch_concurrency                             | 32                        | Number of files to read in parallel when inferring schema and statistics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |

From 2d7892b6060793e721ce1d66242f485656e0430a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 30 Oct 2024 14:20:21 -0400
Subject: [PATCH 06/32] Enable reading `StringViewArray` by default from
 Parquet (8% improvement for entire ClickBench suite) (#13101)

* Read String data as `StringView` by default from Parquet

* fix benchmark
---
 benchmarks/src/bin/external_aggr.rs           |  7 +--
 benchmarks/src/clickbench.rs                  |  1 -
 benchmarks/src/imdb/run.rs                    |  8 +---
 benchmarks/src/tpch/run.rs                    |  7 ---
 benchmarks/src/util/options.rs                |  5 --
 datafusion/common/src/config.rs               |  2 +-
 datafusion/common/src/scalar/mod.rs           |  5 ++
 datafusion/core/tests/parquet/page_pruning.rs |  5 +-
 .../sqllogictest/test_files/describe.slt      |  4 +-
 .../sqllogictest/test_files/explain.slt       | 12 ++---
 .../test_files/information_schema.slt         |  4 +-
 datafusion/sqllogictest/test_files/map.slt    |  2 +-
 .../sqllogictest/test_files/parquet.slt       | 48 +++++++++----------
 docs/source/user-guide/configs.md             |  2 +-
 14 files changed, 47 insertions(+), 65 deletions(-)

diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs
index 1bc74e22ccfa..6438593a20a0 100644
--- a/benchmarks/src/bin/external_aggr.rs
+++ b/benchmarks/src/bin/external_aggr.rs
@@ -193,12 +193,7 @@ impl ExternalAggrConfig {
     ) -> Result<Vec<QueryResult>> {
         let query_name =
             format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
-        let mut config = self.common.config();
-        config
-            .options_mut()
-            .execution
-            .parquet
-            .schema_force_view_types = self.common.force_view_types;
+        let config = self.common.config();
         let runtime_config = RuntimeConfig::new()
             .with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
             .build_arc()?;
diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
index 3564ae82585a..46dd4b18825b 100644
--- a/benchmarks/src/clickbench.rs
+++ b/benchmarks/src/clickbench.rs
@@ -119,7 +119,6 @@ impl RunOpt {
         let mut config = self.common.config();
         {
             let parquet_options = &mut config.options_mut().execution.parquet;
-            parquet_options.schema_force_view_types = self.common.force_view_types;
             // The hits_partitioned dataset specifies string columns
             // as binary due to how it was written. Force it to strings
             parquet_options.binary_as_string = true;
diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs
index fd4960606110..47c356990881 100644
--- a/benchmarks/src/imdb/run.rs
+++ b/benchmarks/src/imdb/run.rs
@@ -305,11 +305,7 @@ impl RunOpt {
             .config()
             .with_collect_statistics(!self.disable_statistics);
         config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
-        config
-            .options_mut()
-            .execution
-            .parquet
-            .schema_force_view_types = self.common.force_view_types;
+
         let ctx = SessionContext::new_with_config(config);
 
         // register tables
@@ -517,7 +513,6 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
-            force_view_types: false,
         };
         let opt = RunOpt {
             query: Some(query),
@@ -551,7 +546,6 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
-            force_view_types: false,
         };
         let opt = RunOpt {
             query: Some(query),
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index e316a66e1c60..9ff1f72d8606 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -120,11 +120,6 @@ impl RunOpt {
             .config()
             .with_collect_statistics(!self.disable_statistics);
         config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
-        config
-            .options_mut()
-            .execution
-            .parquet
-            .schema_force_view_types = self.common.force_view_types;
         let ctx = SessionContext::new_with_config(config);
 
         // register tables
@@ -345,7 +340,6 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
-            force_view_types: false,
         };
         let opt = RunOpt {
             query: Some(query),
@@ -379,7 +373,6 @@ mod tests {
             partitions: Some(2),
             batch_size: 8192,
             debug: false,
-            force_view_types: false,
         };
         let opt = RunOpt {
             query: Some(query),
diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs
index efdb074b2461..b9398e5b522f 100644
--- a/benchmarks/src/util/options.rs
+++ b/benchmarks/src/util/options.rs
@@ -37,11 +37,6 @@ pub struct CommonOpt {
     /// Activate debug mode to see more details
     #[structopt(short, long)]
     pub debug: bool,
-
-    /// If true, will use StringView/BinaryViewArray instead of String/BinaryArray
-    /// when reading ParquetFiles
-    #[structopt(long)]
-    pub force_view_types: bool,
 }
 
 impl CommonOpt {
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 1fa32aefb33f..336513035036 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -399,7 +399,7 @@ config_namespace! {
 
         /// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
         /// and `Binary/BinaryLarge` with `BinaryView`.
-        pub schema_force_view_types: bool, default = false
+        pub schema_force_view_types: bool, default = true
 
         /// (reading) If true, parquet reader will read columns of
         /// `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`.
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 7a1eaa2ad65b..5595f4f9fa70 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -980,6 +980,11 @@ impl ScalarValue {
         ScalarValue::from(val.into())
     }
 
+    /// Returns a [`ScalarValue::Utf8View`] representing `val`
+    pub fn new_utf8view(val: impl Into<String>) -> Self {
+        ScalarValue::Utf8View(Some(val.into()))
+    }
+
     /// Returns a [`ScalarValue::IntervalYearMonth`] representing
     /// `years` years and `months` months
     pub fn new_interval_ym(years: i32, months: i32) -> Self {
diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs
index 15efd4bcd9dd..d201ed3a841f 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -149,8 +149,9 @@ async fn page_index_filter_one_col() {
     let session_ctx = SessionContext::new();
     let task_ctx = session_ctx.task_ctx();
 
-    // 5.create filter date_string_col == 1;
-    let filter = col("date_string_col").eq(lit("01/01/09"));
+    // 5.create filter date_string_col == "01/01/09"`;
+    // Note this test doesn't apply type coercion so the literal must match the actual view type
+    let filter = col("date_string_col").eq(lit(ScalarValue::new_utf8view("01/01/09")));
     let parquet_exec = get_parquet_exec(&state, filter).await;
     let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap();
     let batch = results.next().await.unwrap().unwrap();
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index 077e8e6474d1..e4cb30628eec 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -81,8 +81,8 @@ int_col Int32 YES
 bigint_col Int64 YES
 float_col Float32 YES
 double_col Float64 YES
-date_string_col Utf8 YES
-string_col Utf8 YES
+date_string_col Utf8View YES
+string_col Utf8View YES
 timestamp_col Timestamp(Nanosecond, None) YES
 year Int32 YES
 month Int32 YES
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 1340fd490e06..54658f36ca14 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -305,8 +305,8 @@ initial_physical_plan
 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 initial_physical_plan_with_schema
-01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
+02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 physical_plan after OutputRequirements
 01)OutputRequirementExec, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
@@ -328,7 +328,7 @@ physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
 physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
 statement ok
@@ -345,8 +345,8 @@ initial_physical_plan_with_stats
 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 initial_physical_plan_with_schema
-01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
-02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
+02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 physical_plan after OutputRequirements
 01)OutputRequirementExec
 02)--GlobalLimitExec: skip=0, fetch=10
@@ -369,7 +369,7 @@ physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
 physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10
 physical_plan_with_stats ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
-physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N]
+physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 03ab4a090e67..84d18233d572 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -202,7 +202,7 @@ datafusion.execution.parquet.metadata_size_hint NULL
 datafusion.execution.parquet.pruning true
 datafusion.execution.parquet.pushdown_filters false
 datafusion.execution.parquet.reorder_filters false
-datafusion.execution.parquet.schema_force_view_types false
+datafusion.execution.parquet.schema_force_view_types true
 datafusion.execution.parquet.skip_metadata true
 datafusion.execution.parquet.statistics_enabled page
 datafusion.execution.parquet.write_batch_size 1024
@@ -295,7 +295,7 @@ datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the
 datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
 datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
 datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
-datafusion.execution.parquet.schema_force_view_types false (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.
+datafusion.execution.parquet.schema_force_view_types true (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.
 datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
 datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 726de75b5141..ed4f999aa16c 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -42,7 +42,7 @@ describe data;
 ----
 ints Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
 strings Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
-timestamp Utf8 NO
+timestamp Utf8View NO
 
 query ??T
 SELECT * FROM data ORDER by ints['bytes'] DESC LIMIT 10;
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index bf68a1851137..656cfcbe076d 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -384,15 +384,15 @@ select
   arrow_typeof(binaryview_col),  binaryview_col
  FROM binary_as_string_default;
 ----
-Binary 616161 Binary 616161 Binary 616161
-Binary 626262 Binary 626262 Binary 626262
-Binary 636363 Binary 636363 Binary 636363
-Binary 646464 Binary 646464 Binary 646464
-Binary 656565 Binary 656565 Binary 656565
-Binary 666666 Binary 666666 Binary 666666
-Binary 676767 Binary 676767 Binary 676767
-Binary 686868 Binary 686868 Binary 686868
-Binary 696969 Binary 696969 Binary 696969
+BinaryView 616161 BinaryView 616161 BinaryView 616161
+BinaryView 626262 BinaryView 626262 BinaryView 626262
+BinaryView 636363 BinaryView 636363 BinaryView 636363
+BinaryView 646464 BinaryView 646464 BinaryView 646464
+BinaryView 656565 BinaryView 656565 BinaryView 656565
+BinaryView 666666 BinaryView 666666 BinaryView 666666
+BinaryView 676767 BinaryView 676767 BinaryView 676767
+BinaryView 686868 BinaryView 686868 BinaryView 686868
+BinaryView 696969 BinaryView 696969 BinaryView 696969
 
 # Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicates)
 query TT
@@ -405,13 +405,13 @@ EXPLAIN
    binaryview_col LIKE '%a%';
 ----
 logical_plan
-01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")
-02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")]
+01)Filter: CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%")
+02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%")]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a%
+02)--FilterExec: CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a%
+04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a%
 
 
 statement ok
@@ -432,15 +432,15 @@ select
   arrow_typeof(binaryview_col),  binaryview_col
  FROM binary_as_string_option;
 ----
-Utf8 aaa Utf8 aaa Utf8 aaa
-Utf8 bbb Utf8 bbb Utf8 bbb
-Utf8 ccc Utf8 ccc Utf8 ccc
-Utf8 ddd Utf8 ddd Utf8 ddd
-Utf8 eee Utf8 eee Utf8 eee
-Utf8 fff Utf8 fff Utf8 fff
-Utf8 ggg Utf8 ggg Utf8 ggg
-Utf8 hhh Utf8 hhh Utf8 hhh
-Utf8 iii Utf8 iii Utf8 iii
+Utf8View aaa Utf8View aaa Utf8View aaa
+Utf8View bbb Utf8View bbb Utf8View bbb
+Utf8View ccc Utf8View ccc Utf8View ccc
+Utf8View ddd Utf8View ddd Utf8View ddd
+Utf8View eee Utf8View eee Utf8View eee
+Utf8View fff Utf8View fff Utf8View fff
+Utf8View ggg Utf8View ggg Utf8View ggg
+Utf8View hhh Utf8View hhh Utf8View hhh
+Utf8View iii Utf8View iii Utf8View iii
 
 # Run an explain plan to show the cast happens in the plan (there should be no casts)
 query TT
@@ -453,8 +453,8 @@ EXPLAIN
    binaryview_col LIKE '%a%';
 ----
 logical_plan
-01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8("%a%")
-02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%"), binary_as_string_option.largebinary_col LIKE Utf8("%a%"), binary_as_string_option.binaryview_col LIKE Utf8("%a%")]
+01)Filter: binary_as_string_option.binary_col LIKE Utf8View("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8View("%a%")
+02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8View("%a%"), binary_as_string_option.largebinary_col LIKE Utf8View("%a%"), binary_as_string_option.binaryview_col LIKE Utf8View("%a%")]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a%
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index e920189713fc..015d35cf2455 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -56,7 +56,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.metadata_size_hint                         | NULL                      | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer                                                                                                                                                                                                                                                                                        |
 | datafusion.execution.parquet.pushdown_filters                           | false                     | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                    |
-| datafusion.execution.parquet.schema_force_view_types                    | false                     | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.schema_force_view_types                    | true                      | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.parquet.binary_as_string                           | false                     | (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead.                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |

From 7d34ccc05778870a1554f234742ef362a9355077 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 30 Oct 2024 14:21:34 -0400
Subject: [PATCH 07/32] Increase fuzz testing of streaming group by / low
 cardinality columns (#12990)

---
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   | 24 ++---
 .../aggregation_fuzzer/data_generator.rs      | 96 +++++++++++++------
 .../fuzz_cases/aggregation_fuzzer/fuzzer.rs   | 49 +++++++---
 3 files changed, 113 insertions(+), 56 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 28901b14b5b7..d715635c5951 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -65,10 +65,6 @@ use crate::fuzz_cases::aggregation_fuzzer::{
 //
 // TODO: test other aggregate functions
 // - AVG (unstable given the wide range of inputs)
-//
-// TODO: specific test for ordering (ensure all group by columns are ordered)
-//  Currently the data is sorted by random columns, so there are almost no
-//  repeated runs. To improve coverage we should also sort by lower cardinality columns
 #[tokio::test(flavor = "multi_thread")]
 async fn test_min() {
     let data_gen_config = baseline_config();
@@ -79,7 +75,7 @@ async fn test_min() {
         .with_aggregate_function("min")
         // min works on all column types
         .with_aggregate_arguments(data_gen_config.all_columns())
-        .with_group_by_columns(data_gen_config.all_columns());
+        .set_group_by_columns(data_gen_config.all_columns());
 
     AggregationFuzzerBuilder::from(data_gen_config)
         .add_query_builder(query_builder)
@@ -98,7 +94,7 @@ async fn test_max() {
         .with_aggregate_function("max")
         // max works on all column types
         .with_aggregate_arguments(data_gen_config.all_columns())
-        .with_group_by_columns(data_gen_config.all_columns());
+        .set_group_by_columns(data_gen_config.all_columns());
 
     AggregationFuzzerBuilder::from(data_gen_config)
         .add_query_builder(query_builder)
@@ -118,7 +114,7 @@ async fn test_sum() {
         .with_distinct_aggregate_function("sum")
         // sum only works on numeric columns
         .with_aggregate_arguments(data_gen_config.numeric_columns())
-        .with_group_by_columns(data_gen_config.all_columns());
+        .set_group_by_columns(data_gen_config.all_columns());
 
     AggregationFuzzerBuilder::from(data_gen_config)
         .add_query_builder(query_builder)
@@ -138,7 +134,7 @@ async fn test_count() {
         .with_distinct_aggregate_function("count")
         // count work for all arguments
         .with_aggregate_arguments(data_gen_config.all_columns())
-        .with_group_by_columns(data_gen_config.all_columns());
+        .set_group_by_columns(data_gen_config.all_columns());
 
     AggregationFuzzerBuilder::from(data_gen_config)
         .add_query_builder(query_builder)
@@ -174,15 +170,21 @@ fn baseline_config() -> DatasetGeneratorConfig {
         // TODO add support for utf8view in data generator
         // ColumnDescr::new("utf8view", DataType::Utf8View),
         // todo binary
+        // low cardinality columns
+        ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
+        ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
     ];
 
+    let min_num_rows = 512;
+    let max_num_rows = 1024;
+
     DatasetGeneratorConfig {
         columns,
-        rows_num_range: (512, 1024),
+        rows_num_range: (min_num_rows, max_num_rows),
         sort_keys_set: vec![
             // low cardinality to try and get many repeated runs
-            vec![String::from("u8")],
-            vec![String::from("utf8"), String::from("u8")],
+            vec![String::from("u8_low")],
+            vec![String::from("utf8_low"), String::from("u8_low")],
         ],
     }
 }
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
index ef9b5a7f355a..4fa1b7aa263d 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
@@ -174,11 +174,16 @@ impl Dataset {
 
 #[derive(Debug, Clone)]
 pub struct ColumnDescr {
-    // Column name
+    /// Column name
     name: String,
 
-    // Data type of this column
+    /// Data type of this column
     column_type: DataType,
+
+    /// The maximum number of distinct values in this column.
+    ///
+    /// See [`ColumnDescr::with_max_num_distinct`] for more information
+    max_num_distinct: Option<usize>,
 }
 
 impl ColumnDescr {
@@ -187,8 +192,18 @@ impl ColumnDescr {
         Self {
             name: name.to_string(),
             column_type,
+            max_num_distinct: None,
         }
     }
+
+    /// set the maximum number of distinct values in this column
+    ///
+    /// If `None`, the number of distinct values is randomly selected between 1
+    /// and the number of rows.
+    pub fn with_max_num_distinct(mut self, num_distinct: usize) -> Self {
+        self.max_num_distinct = Some(num_distinct);
+        self
+    }
 }
 
 /// Record batch generator
@@ -203,20 +218,15 @@ struct RecordBatchGenerator {
 }
 
 macro_rules! generate_string_array {
-    ($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $OFFSET_TYPE:ty) => {{
+    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $OFFSET_TYPE:ty) => {{
         let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
         let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
         let max_len = $BATCH_GEN_RNG.gen_range(1..50);
-        let num_distinct_strings = if $NUM_ROWS > 1 {
-            $BATCH_GEN_RNG.gen_range(1..$NUM_ROWS)
-        } else {
-            $NUM_ROWS
-        };
 
         let mut generator = StringArrayGenerator {
             max_len,
             num_strings: $NUM_ROWS,
-            num_distinct_strings,
+            num_distinct_strings: $MAX_NUM_DISTINCT,
             null_pct,
             rng: $ARRAY_GEN_RNG,
         };
@@ -226,19 +236,14 @@ macro_rules! generate_string_array {
 }
 
 macro_rules! generate_primitive_array {
-    ($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {
+    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {
         paste::paste! {{
             let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
             let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-            let num_distinct_primitives = if $NUM_ROWS > 1 {
-                $BATCH_GEN_RNG.gen_range(1..$NUM_ROWS)
-            } else {
-                $NUM_ROWS
-            };
 
             let mut generator = PrimitiveArrayGenerator {
                 num_primitives: $NUM_ROWS,
-                num_distinct_primitives,
+                num_distinct_primitives: $MAX_NUM_DISTINCT,
                 null_pct,
                 rng: $ARRAY_GEN_RNG,
             };
@@ -268,7 +273,7 @@ impl RecordBatchGenerator {
         let mut arrays = Vec::with_capacity(self.columns.len());
         for col in self.columns.iter() {
             let array = self.generate_array_of_type(
-                col.column_type.clone(),
+                col,
                 num_rows,
                 &mut rng,
                 array_gen_rng.clone(),
@@ -289,16 +294,28 @@ impl RecordBatchGenerator {
 
     fn generate_array_of_type(
         &self,
-        data_type: DataType,
+        col: &ColumnDescr,
         num_rows: usize,
         batch_gen_rng: &mut ThreadRng,
         array_gen_rng: StdRng,
     ) -> ArrayRef {
-        match data_type {
+        let num_distinct = if num_rows > 1 {
+            batch_gen_rng.gen_range(1..num_rows)
+        } else {
+            num_rows
+        };
+        // cap to at most the num_distinct values
+        let max_num_distinct = col
+            .max_num_distinct
+            .map(|max| num_distinct.min(max))
+            .unwrap_or(num_distinct);
+
+        match col.column_type {
             DataType::Int8 => {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Int8Type
@@ -308,6 +325,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Int16Type
@@ -317,6 +335,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Int32Type
@@ -326,6 +345,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Int64Type
@@ -335,6 +355,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     UInt8Type
@@ -344,6 +365,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     UInt16Type
@@ -353,6 +375,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     UInt32Type
@@ -362,6 +385,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     UInt64Type
@@ -371,6 +395,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Float32Type
@@ -380,6 +405,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Float64Type
@@ -389,6 +415,7 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Date32Type
@@ -398,19 +425,34 @@ impl RecordBatchGenerator {
                 generate_primitive_array!(
                     self,
                     num_rows,
+                    max_num_distinct,
                     batch_gen_rng,
                     array_gen_rng,
                     Date64Type
                 )
             }
             DataType::Utf8 => {
-                generate_string_array!(self, num_rows, batch_gen_rng, array_gen_rng, i32)
+                generate_string_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    i32
+                )
             }
             DataType::LargeUtf8 => {
-                generate_string_array!(self, num_rows, batch_gen_rng, array_gen_rng, i64)
+                generate_string_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    i64
+                )
             }
             _ => {
-                panic!("Unsupported data generator type: {data_type}")
+                panic!("Unsupported data generator type: {}", col.column_type)
             }
         }
     }
@@ -435,14 +477,8 @@ mod test {
         //  - Their rows num should be same and between [16, 32]
         let config = DatasetGeneratorConfig {
             columns: vec![
-                ColumnDescr {
-                    name: "a".to_string(),
-                    column_type: DataType::Utf8,
-                },
-                ColumnDescr {
-                    name: "b".to_string(),
-                    column_type: DataType::UInt32,
-                },
+                ColumnDescr::new("a", DataType::Utf8),
+                ColumnDescr::new("b", DataType::UInt32),
             ],
             rows_num_range: (16, 32),
             sort_keys_set: vec![vec!["b".to_string()]],
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
index 0704bafa0318..d021e73f35b2 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
@@ -63,17 +63,35 @@ impl AggregationFuzzerBuilder {
     }
 
     /// Adds random SQL queries to the fuzzer along with the table name
-    pub fn add_query_builder(mut self, query_builder: QueryBuilder) -> Self {
-        const NUM_QUERIES: usize = 10;
+    ///
+    /// Adds
+    /// - 3 random queries
+    /// - 3 random queries for each group by selected from the sort keys
+    /// - 1 random query with no grouping
+    pub fn add_query_builder(mut self, mut query_builder: QueryBuilder) -> Self {
+        const NUM_QUERIES: usize = 3;
         for _ in 0..NUM_QUERIES {
-            self = self.add_sql(&query_builder.generate_query());
+            let sql = query_builder.generate_query();
+            self.candidate_sqls.push(Arc::from(sql));
         }
-        self.table_name(query_builder.table_name())
-    }
-
-    fn add_sql(mut self, sql: &str) -> Self {
+        // also add several queries limited to grouping on the group by columns only, if any
+        // So if the data is sorted on `a,b` only group by `a,b` or`a` or `b`
+        if let Some(data_gen_config) = &self.data_gen_config {
+            for sort_keys in &data_gen_config.sort_keys_set {
+                let group_by_columns = sort_keys.iter().map(|s| s.as_str());
+                query_builder = query_builder.set_group_by_columns(group_by_columns);
+                for _ in 0..NUM_QUERIES {
+                    let sql = query_builder.generate_query();
+                    self.candidate_sqls.push(Arc::from(sql));
+                }
+            }
+        }
+        // also add a query with no grouping
+        query_builder = query_builder.set_group_by_columns(vec![]);
+        let sql = query_builder.generate_query();
         self.candidate_sqls.push(Arc::from(sql));
-        self
+
+        self.table_name(query_builder.table_name())
     }
 
     pub fn table_name(mut self, table_name: &str) -> Self {
@@ -359,7 +377,7 @@ fn format_batches_with_limit(batches: &[RecordBatch]) -> impl std::fmt::Display
 /// ```sql
 /// SELECT AGG(..) FROM table_name GROUP BY <group_by_columns>
 ///```
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone)]
 pub struct QueryBuilder {
     /// The name of the table to query
     table_name: String,
@@ -412,17 +430,16 @@ impl QueryBuilder {
         self
     }
 
-    /// Add a column to be used in the group bys
-    pub fn with_group_by_columns<'a>(
+    /// Set the columns to be used in the group bys clauses
+    pub fn set_group_by_columns<'a>(
         mut self,
         group_by: impl IntoIterator<Item = &'a str>,
     ) -> Self {
-        let group_by = group_by.into_iter().map(String::from);
-        self.group_by_columns.extend(group_by);
+        self.group_by_columns = group_by.into_iter().map(String::from).collect();
         self
     }
 
-    /// Add a column to be used as an argument in the aggregate functions
+    /// Add one or more columns to be used as an argument in the aggregate functions
     pub fn with_aggregate_arguments<'a>(
         mut self,
         arguments: impl IntoIterator<Item = &'a str>,
@@ -497,7 +514,9 @@ impl QueryBuilder {
 
         let mut already_used = HashSet::new();
         let mut group_by = vec![];
-        while group_by.len() < num_group_by {
+        while group_by.len() < num_group_by
+            && already_used.len() != self.group_by_columns.len()
+        {
             let idx = rng.gen_range(0..self.group_by_columns.len());
             if already_used.insert(idx) {
                 group_by.push(self.group_by_columns[idx].clone());

From 68bf7ad081fd551b5d6797b7655b93bb7048292f Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
Date: Wed, 30 Oct 2024 21:04:41 +0100
Subject: [PATCH 08/32] fix: Order by mentioning missing column multiple times
 (#13158)

* fix: Order by mentioning missing column multiple times

Previously we crashed on queries where the order by mentioned a missing
columns multiple times.

Closes #13157

* Use HashSet to avoid quadratic algorithm

* Use IndexSet to maintain old order and be deterministic
---
 datafusion/expr/src/logical_plan/builder.rs  | 18 ++++++++++--------
 datafusion/sqllogictest/test_files/order.slt |  8 ++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 2547aa23d3cd..e50ffb59d24a 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -57,6 +57,7 @@ use datafusion_common::{
     UnnestOptions,
 };
 use datafusion_expr_common::type_coercion::binary::type_union_resolution;
+use indexmap::IndexSet;
 
 /// Default table name for unnamed table
 pub const UNNAMED_TABLE: &str = "?table?";
@@ -567,7 +568,7 @@ impl LogicalPlanBuilder {
     /// See <https://github.com/apache/datafusion/issues/5065> for more details
     fn add_missing_columns(
         curr_plan: LogicalPlan,
-        missing_cols: &[Column],
+        missing_cols: &IndexSet<Column>,
         is_distinct: bool,
     ) -> Result<LogicalPlan> {
         match curr_plan {
@@ -612,7 +613,7 @@ impl LogicalPlanBuilder {
 
     fn ambiguous_distinct_check(
         missing_exprs: &[Expr],
-        missing_cols: &[Column],
+        missing_cols: &IndexSet<Column>,
         projection_exprs: &[Expr],
     ) -> Result<()> {
         if missing_exprs.is_empty() {
@@ -677,15 +678,16 @@ impl LogicalPlanBuilder {
         let schema = self.plan.schema();
 
         // Collect sort columns that are missing in the input plan's schema
-        let mut missing_cols: Vec<Column> = vec![];
+        let mut missing_cols: IndexSet<Column> = IndexSet::new();
         sorts.iter().try_for_each::<_, Result<()>>(|sort| {
             let columns = sort.expr.column_refs();
 
-            columns.into_iter().for_each(|c| {
-                if !schema.has_column(c) {
-                    missing_cols.push(c.clone());
-                }
-            });
+            missing_cols.extend(
+                columns
+                    .into_iter()
+                    .filter(|c| !schema.has_column(c))
+                    .cloned(),
+            );
 
             Ok(())
         })?;
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 6cc7ee0403f2..dd73447f8b25 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -335,6 +335,14 @@ select column1 from foo order by log(column2);
 3
 5
 
+# Test issue: https://github.com/apache/datafusion/issues/13157
+query I
+select column1 from foo order by column2 % 2, column2;
+----
+1
+3
+5
+
 # Cleanup
 statement ok
 drop table foo;

From 8c6bb39c54aefaff79aa657009ea22680464ba5d Mon Sep 17 00:00:00 2001
From: smarticen <49951542+smarticen@users.noreply.github.com>
Date: Thu, 31 Oct 2024 04:05:15 +0800
Subject: [PATCH 09/32] fix: import JoinTestType without triggering
 unused_qualifications lint (#13170)

* fix: import JoinTestType without triggering unused_qualifications lint

* fix: remove use statement instead of removing qualifications
---
 datafusion/core/tests/fuzz_cases/join_fuzz.rs | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index c8478db22bd4..5c03bc3a9110 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -41,7 +41,6 @@ use datafusion::physical_plan::joins::{
 };
 use datafusion::physical_plan::memory::MemoryExec;
 
-use crate::fuzz_cases::join_fuzz::JoinTestType::NljHj;
 use datafusion::prelude::{SessionConfig, SessionContext};
 use test_utils::stagger_batch_with_seed;
 
@@ -90,7 +89,6 @@ fn col_lt_col_filter(schema1: Arc<Schema>, schema2: Arc<Schema>) -> JoinFilter {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_inner_join_1k_filtered() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -103,7 +101,6 @@ async fn test_inner_join_1k_filtered() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_inner_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -116,7 +113,6 @@ async fn test_inner_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_left_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -129,7 +125,6 @@ async fn test_left_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_left_join_1k_filtered() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -142,7 +137,6 @@ async fn test_left_join_1k_filtered() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_right_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -155,7 +149,6 @@ async fn test_right_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_right_join_1k_filtered() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -168,7 +161,6 @@ async fn test_right_join_1k_filtered() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_full_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -181,7 +173,6 @@ async fn test_full_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 // flaky for HjSmj case
 // https://github.com/apache/datafusion/issues/12359
 async fn test_full_join_1k_filtered() {
@@ -196,7 +187,6 @@ async fn test_full_join_1k_filtered() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_semi_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -209,7 +199,6 @@ async fn test_semi_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_semi_join_1k_filtered() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -222,7 +211,6 @@ async fn test_semi_join_1k_filtered() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_anti_join_1k() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -235,7 +223,6 @@ async fn test_anti_join_1k() {
 }
 
 #[tokio::test]
-#[allow(unused_qualifications)]
 async fn test_anti_join_1k_filtered() {
     JoinFuzzTestCase::new(
         make_staggered_batches(1000),
@@ -243,7 +230,7 @@ async fn test_anti_join_1k_filtered() {
         JoinType::LeftAnti,
         Some(Box::new(col_lt_col_filter)),
     )
-    .run_test(&[JoinTestType::HjSmj, NljHj], false)
+    .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false)
     .await
 }
 
@@ -461,7 +448,6 @@ impl JoinFuzzTestCase {
     /// `join_tests` - identifies what join types to test
     /// if `debug` flag is set the test will save randomly generated inputs and outputs to user folders,
     /// so it is easy to debug a test on top of the failed data
-    #[allow(unused_qualifications)]
     async fn run_test(&self, join_tests: &[JoinTestType], debug: bool) {
         for batch_size in self.batch_sizes {
             let session_config = SessionConfig::new().with_batch_size(*batch_size);

From 538e0d158b231339cf0757e56d938ad3067abfc0 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Wed, 30 Oct 2024 16:22:51 -0400
Subject: [PATCH 10/32] FFI initial implementation (#12920)

* Initial commit of FFI table provider code

* Add table type

* Make struct pub

* Implementing supports_filters_pushdown

* Move plan properties over to its own file

* Adding release function

* Adding release functions to additional structs

* Resolve memory leaks

* Rename ForeignExecutionPlan for consistency

* Resolving memory leak issues

* Remove debug statements. Create runtime for block_on operations

* Switching over to stable abi and async-ffi

* Make consistent the use of Foreign and FFI on struct names

* Apply prettier

* Format for linter

* Add doc-comment

* Add option to specify table provider does not support pushdown filters to avoid extra work for some providers

* Remove setting default features in cargo file

* Tokio only needed for unit tests

* Provide log errors rather than failing silently on schema requests

* Set default features for datafusion to false in ffi crate

* Using TryFrom or From instead of implementing new when there is only one parameter

* Move arrow wrappers into their own file

* Add documentation

* Small adjustment to documentation

* Add license text

* Fix unnecessary qualification

* taplo format
---
 Cargo.toml                                |   2 +
 datafusion/ffi/Cargo.toml                 |  51 +++
 datafusion/ffi/README.md                  |  81 ++++
 datafusion/ffi/src/arrow_wrappers.rs      |  70 ++++
 datafusion/ffi/src/execution_plan.rs      | 361 ++++++++++++++++++
 datafusion/ffi/src/lib.rs                 |  29 ++
 datafusion/ffi/src/plan_properties.rs     | 297 +++++++++++++++
 datafusion/ffi/src/record_batch_stream.rs | 176 +++++++++
 datafusion/ffi/src/session_config.rs      | 187 +++++++++
 datafusion/ffi/src/table_provider.rs      | 443 ++++++++++++++++++++++
 datafusion/ffi/src/table_source.rs        |  87 +++++
 11 files changed, 1784 insertions(+)
 create mode 100644 datafusion/ffi/Cargo.toml
 create mode 100644 datafusion/ffi/README.md
 create mode 100644 datafusion/ffi/src/arrow_wrappers.rs
 create mode 100644 datafusion/ffi/src/execution_plan.rs
 create mode 100644 datafusion/ffi/src/lib.rs
 create mode 100644 datafusion/ffi/src/plan_properties.rs
 create mode 100644 datafusion/ffi/src/record_batch_stream.rs
 create mode 100644 datafusion/ffi/src/session_config.rs
 create mode 100644 datafusion/ffi/src/table_provider.rs
 create mode 100644 datafusion/ffi/src/table_source.rs

diff --git a/Cargo.toml b/Cargo.toml
index 0a7184ad2d99..2f8896f7d90c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,7 @@ members = [
     "datafusion/expr",
     "datafusion/expr-common",
     "datafusion/execution",
+    "datafusion/ffi",
     "datafusion/functions",
     "datafusion/functions-aggregate",
     "datafusion/functions-aggregate-common",
@@ -99,6 +100,7 @@ datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.
 datafusion-execution = { path = "datafusion/execution", version = "42.1.0" }
 datafusion-expr = { path = "datafusion/expr", version = "42.1.0" }
 datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "42.1.0" }
 datafusion-functions = { path = "datafusion/functions", version = "42.1.0" }
 datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" }
 datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" }
diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml
new file mode 100644
index 000000000000..119747342515
--- /dev/null
+++ b/datafusion/ffi/Cargo.toml
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-ffi"
+description = "Foreign Function Interface implementation for DataFusion"
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+# Specify MSRV here as `cargo msrv` doesn't support workspace version
+rust-version = "1.76"
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_ffi"
+path = "src/lib.rs"
+
+[dependencies]
+abi_stable = "0.11.3"
+arrow = { workspace = true, features = ["ffi"] }
+async-ffi = { version = "0.5.0", features = ["abi_stable"] }
+async-trait = { workspace = true }
+datafusion = { workspace = true, default-features = false }
+datafusion-proto = { workspace = true }
+doc-comment = { workspace = true }
+futures = { workspace = true }
+log = { workspace = true }
+prost = { workspace = true }
+
+[dev-dependencies]
+tokio = { workspace = true }
diff --git a/datafusion/ffi/README.md b/datafusion/ffi/README.md
new file mode 100644
index 000000000000..ba4bb8b961a1
--- /dev/null
+++ b/datafusion/ffi/README.md
@@ -0,0 +1,81 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# `datafusion-ffi`: Apache DataFusion Foreign Function Interface
+
+This crate contains code to allow interoperability of Apache [DataFusion]
+with functions from other languages using a stable interface.
+
+See [API Docs] for details and examples.
+
+We expect this crate may be used by both sides of the FFI. This allows users
+to create modules that can interoperate with the necessity of using the same
+version of DataFusion. The driving use case has been the `datafusion-python`
+repository, but many other use cases may exist. We envision at least two
+use cases.
+
+1. `datafusion-python` which will use the FFI to provide external services such
+   as a `TableProvider` without needing to re-export the entire `datafusion-python`
+   code base. With `datafusion-ffi` these packages do not need `datafusion-python`
+   as a dependency at all.
+2. Users may want to create a modular interface that allows runtime loading of
+   libraries.
+
+## Struct Layout
+
+In this crate we have a variety of structs which closely mimic the behavior of
+their internal counterparts. In the following example, we will refer to the
+`TableProvider`, but the same pattern exists for other structs.
+
+Each of the exposted structs in this crate is provided with a variant prefixed
+with `Foreign`. This variant is designed to be used by the consumer of the
+foreign code. The `Foreign` structs should _never_ access the `private_data`
+fields. Instead they should only access the data returned through the function
+calls defined on the `FFI_` structs. The second purpose of the `Foreign`
+structs is to contain additional data that may be needed by the traits that
+are implemented on them. Some of these traits require borrowing data which
+can be far more convienent to be locally stored.
+
+For example, we have a struct `FFI_TableProvider` to give access to the
+`TableProvider` functions like `table_type()` and `scan()`. If we write a
+library that wishes to expose it's `TableProvider`, then we can access the
+private data that contains the Arc reference to the `TableProvider` via
+`FFI_TableProvider`. This data is local to the library.
+
+If we have a program that accesses a `TableProvider` via FFI, then it
+will use `ForeignTableProvider`. When using `ForeignTableProvider` we **must**
+not attempt to access the `private_data` field in `FFI_TableProvider`. If a
+user is testing locally, you may be able to successfully access this field, but
+it will only work if you are building against the exact same version of
+`DataFusion` for both libraries **and** the same compiler. It will not work
+in general.
+
+It is worth noting that which library is the `local` and which is `foreign`
+depends on which interface we are considering. For example, suppose we have a
+Python library called `my_provider` that exposes a `TableProvider` called
+`MyProvider` via `FFI_TableProvider`. Within the library `my_provider` we can
+access the `private_data` via `FFI_TableProvider`. We connect this to
+`datafusion-python`, where we access it as a `ForeignTableProvider`. Now when
+we call `scan()` on this interface, we have to pass it a `FFI_SessionConfig`.
+The `SessionConfig` is local to `datafusion-python` and **not** `my_provider`.
+It is important to be careful when expanding these functions to be certain which
+side of the interface each object refers to.
+
+[datafusion]: https://datafusion.apache.org
+[api docs]: http://docs.rs/datafusion-ffi/latest
diff --git a/datafusion/ffi/src/arrow_wrappers.rs b/datafusion/ffi/src/arrow_wrappers.rs
new file mode 100644
index 000000000000..c5add8782c51
--- /dev/null
+++ b/datafusion/ffi/src/arrow_wrappers.rs
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use abi_stable::StableAbi;
+use arrow::{
+    datatypes::{Schema, SchemaRef},
+    ffi::{FFI_ArrowArray, FFI_ArrowSchema},
+};
+use log::error;
+
+/// This is a wrapper struct around FFI_ArrowSchema simply to indicate
+/// to the StableAbi macros that the underlying struct is FFI safe.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema);
+
+impl From<SchemaRef> for WrappedSchema {
+    fn from(value: SchemaRef) -> Self {
+        let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) {
+            Ok(s) => s,
+            Err(e) => {
+                error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {}", e);
+                FFI_ArrowSchema::empty()
+            }
+        };
+
+        WrappedSchema(ffi_schema)
+    }
+}
+
+impl From<WrappedSchema> for SchemaRef {
+    fn from(value: WrappedSchema) -> Self {
+        let schema = match Schema::try_from(&value.0) {
+            Ok(s) => s,
+            Err(e) => {
+                error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {}", e);
+                Schema::empty()
+            }
+        };
+        Arc::new(schema)
+    }
+}
+
+/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi
+/// that the struct is FFI Safe. For convenience, we also include the
+/// schema needed to create a record batch from the array.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+pub struct WrappedArray {
+    #[sabi(unsafe_opaque_field)]
+    pub array: FFI_ArrowArray,
+
+    pub schema: WrappedSchema,
+}
diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs
new file mode 100644
index 000000000000..d10eda8990b8
--- /dev/null
+++ b/datafusion/ffi/src/execution_plan.rs
@@ -0,0 +1,361 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::c_void, pin::Pin, sync::Arc};
+
+use abi_stable::{
+    std_types::{RResult, RString, RVec},
+    StableAbi,
+};
+use datafusion::error::Result;
+use datafusion::{
+    error::DataFusionError,
+    execution::{SendableRecordBatchStream, TaskContext},
+    physical_plan::{DisplayAs, ExecutionPlan, PlanProperties},
+};
+
+use crate::{
+    plan_properties::FFI_PlanProperties, record_batch_stream::FFI_RecordBatchStream,
+};
+
+/// A stable struct for sharing a [`ExecutionPlan`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_ExecutionPlan {
+    /// Return the plan properties
+    pub properties: unsafe extern "C" fn(plan: &Self) -> FFI_PlanProperties,
+
+    /// Return a vector of children plans
+    pub children: unsafe extern "C" fn(plan: &Self) -> RVec<FFI_ExecutionPlan>,
+
+    /// Return the plan name.
+    pub name: unsafe extern "C" fn(plan: &Self) -> RString,
+
+    /// Execute the plan and return a record batch stream. Errors
+    /// will be returned as a string.
+    pub execute: unsafe extern "C" fn(
+        plan: &Self,
+        partition: usize,
+    ) -> RResult<FFI_RecordBatchStream, RString>,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignExecutionPlan`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+unsafe impl Send for FFI_ExecutionPlan {}
+unsafe impl Sync for FFI_ExecutionPlan {}
+
+pub struct ExecutionPlanPrivateData {
+    pub plan: Arc<dyn ExecutionPlan>,
+    pub context: Arc<TaskContext>,
+}
+
+unsafe extern "C" fn properties_fn_wrapper(
+    plan: &FFI_ExecutionPlan,
+) -> FFI_PlanProperties {
+    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+    let plan = &(*private_data).plan;
+
+    plan.properties().into()
+}
+
+unsafe extern "C" fn children_fn_wrapper(
+    plan: &FFI_ExecutionPlan,
+) -> RVec<FFI_ExecutionPlan> {
+    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+    let plan = &(*private_data).plan;
+    let ctx = &(*private_data).context;
+
+    let children: Vec<_> = plan
+        .children()
+        .into_iter()
+        .map(|child| FFI_ExecutionPlan::new(Arc::clone(child), Arc::clone(ctx)))
+        .collect();
+
+    children.into()
+}
+
+unsafe extern "C" fn execute_fn_wrapper(
+    plan: &FFI_ExecutionPlan,
+    partition: usize,
+) -> RResult<FFI_RecordBatchStream, RString> {
+    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+    let plan = &(*private_data).plan;
+    let ctx = &(*private_data).context;
+
+    match plan.execute(partition, Arc::clone(ctx)) {
+        Ok(rbs) => RResult::ROk(rbs.into()),
+        Err(e) => RResult::RErr(
+            format!("Error occurred during FFI_ExecutionPlan execute: {}", e).into(),
+        ),
+    }
+}
+unsafe extern "C" fn name_fn_wrapper(plan: &FFI_ExecutionPlan) -> RString {
+    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+    let plan = &(*private_data).plan;
+
+    plan.name().into()
+}
+
+unsafe extern "C" fn release_fn_wrapper(plan: &mut FFI_ExecutionPlan) {
+    let private_data = Box::from_raw(plan.private_data as *mut ExecutionPlanPrivateData);
+    drop(private_data);
+}
+
+unsafe extern "C" fn clone_fn_wrapper(plan: &FFI_ExecutionPlan) -> FFI_ExecutionPlan {
+    let private_data = plan.private_data as *const ExecutionPlanPrivateData;
+    let plan_data = &(*private_data);
+
+    FFI_ExecutionPlan::new(Arc::clone(&plan_data.plan), Arc::clone(&plan_data.context))
+}
+
+impl Clone for FFI_ExecutionPlan {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl FFI_ExecutionPlan {
+    /// This function is called on the provider's side.
+    pub fn new(plan: Arc<dyn ExecutionPlan>, context: Arc<TaskContext>) -> Self {
+        let private_data = Box::new(ExecutionPlanPrivateData { plan, context });
+
+        Self {
+            properties: properties_fn_wrapper,
+            children: children_fn_wrapper,
+            name: name_fn_wrapper,
+            execute: execute_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+        }
+    }
+}
+
+impl Drop for FFI_ExecutionPlan {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+/// This struct is used to access an execution plan provided by a foreign
+/// library across a FFI boundary.
+///
+/// The ForeignExecutionPlan is to be used by the caller of the plan, so it has
+/// no knowledge or access to the private data. All interaction with the plan
+/// must occur through the functions defined in FFI_ExecutionPlan.
+#[derive(Debug)]
+pub struct ForeignExecutionPlan {
+    name: String,
+    plan: FFI_ExecutionPlan,
+    properties: PlanProperties,
+    children: Vec<Arc<dyn ExecutionPlan>>,
+}
+
+unsafe impl Send for ForeignExecutionPlan {}
+unsafe impl Sync for ForeignExecutionPlan {}
+
+impl DisplayAs for ForeignExecutionPlan {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(
+            f,
+            "FFI_ExecutionPlan(number_of_children={})",
+            self.children.len(),
+        )
+    }
+}
+
+impl TryFrom<&FFI_ExecutionPlan> for ForeignExecutionPlan {
+    type Error = DataFusionError;
+
+    fn try_from(plan: &FFI_ExecutionPlan) -> Result<Self, Self::Error> {
+        unsafe {
+            let name = (plan.name)(plan).into();
+
+            let properties: PlanProperties = (plan.properties)(plan).try_into()?;
+
+            let children_rvec = (plan.children)(plan);
+            let children: Result<Vec<_>> = children_rvec
+                .iter()
+                .map(ForeignExecutionPlan::try_from)
+                .map(|child| child.map(|c| Arc::new(c) as Arc<dyn ExecutionPlan>))
+                .collect();
+
+            Ok(Self {
+                name,
+                plan: plan.clone(),
+                properties,
+                children: children?,
+            })
+        }
+    }
+}
+
+impl ExecutionPlan for ForeignExecutionPlan {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        self.children
+            .iter()
+            .map(|p| p as &Arc<dyn ExecutionPlan>)
+            .collect()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(ForeignExecutionPlan {
+            plan: self.plan.clone(),
+            name: self.name.clone(),
+            children,
+            properties: self.properties.clone(),
+        }))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        unsafe {
+            match (self.plan.execute)(&self.plan, partition) {
+                RResult::ROk(stream) => {
+                    let stream = Pin::new(Box::new(stream)) as SendableRecordBatchStream;
+                    Ok(stream)
+                }
+                RResult::RErr(e) => Err(DataFusionError::Execution(format!(
+                    "Error occurred during FFI call to FFI_ExecutionPlan execute. {}",
+                    e
+                ))),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::{physical_plan::Partitioning, prelude::SessionContext};
+
+    use super::*;
+
+    #[derive(Debug)]
+    pub struct EmptyExec {
+        props: PlanProperties,
+    }
+
+    impl EmptyExec {
+        pub fn new(schema: arrow::datatypes::SchemaRef) -> Self {
+            Self {
+                props: PlanProperties::new(
+                    datafusion::physical_expr::EquivalenceProperties::new(schema),
+                    Partitioning::UnknownPartitioning(3),
+                    datafusion::physical_plan::ExecutionMode::Unbounded,
+                ),
+            }
+        }
+    }
+
+    impl DisplayAs for EmptyExec {
+        fn fmt_as(
+            &self,
+            _t: datafusion::physical_plan::DisplayFormatType,
+            _f: &mut std::fmt::Formatter,
+        ) -> std::fmt::Result {
+            unimplemented!()
+        }
+    }
+
+    impl ExecutionPlan for EmptyExec {
+        fn name(&self) -> &'static str {
+            "empty-exec"
+        }
+
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
+
+        fn properties(&self) -> &PlanProperties {
+            &self.props
+        }
+
+        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+            vec![]
+        }
+
+        fn with_new_children(
+            self: Arc<Self>,
+            _: Vec<Arc<dyn ExecutionPlan>>,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unimplemented!()
+        }
+
+        fn execute(
+            &self,
+            _partition: usize,
+            _context: Arc<TaskContext>,
+        ) -> Result<SendableRecordBatchStream> {
+            unimplemented!()
+        }
+
+        fn statistics(&self) -> Result<datafusion::common::Statistics> {
+            unimplemented!()
+        }
+    }
+
+    #[test]
+    fn test_round_trip_ffi_execution_plan() -> Result<()> {
+        use arrow::datatypes::{DataType, Field, Schema};
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+        let ctx = SessionContext::new();
+
+        let original_plan = Arc::new(EmptyExec::new(schema));
+        let original_name = original_plan.name().to_string();
+
+        let local_plan = FFI_ExecutionPlan::new(original_plan, ctx.task_ctx());
+
+        let foreign_plan: ForeignExecutionPlan = (&local_plan).try_into()?;
+
+        assert!(original_name == foreign_plan.name());
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs
new file mode 100644
index 000000000000..4a74e65dc671
--- /dev/null
+++ b/datafusion/ffi/src/lib.rs
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
+#![deny(clippy::clone_on_ref_ptr)]
+
+pub mod arrow_wrappers;
+pub mod execution_plan;
+pub mod plan_properties;
+pub mod record_batch_stream;
+pub mod session_config;
+pub mod table_provider;
+pub mod table_source;
+
+#[cfg(doctest)]
+doc_comment::doctest!("../README.md", readme_example_test);
diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs
new file mode 100644
index 000000000000..722681ae4a1d
--- /dev/null
+++ b/datafusion/ffi/src/plan_properties.rs
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::c_void, sync::Arc};
+
+use abi_stable::{
+    std_types::{
+        RResult::{self, RErr, ROk},
+        RStr, RVec,
+    },
+    StableAbi,
+};
+use arrow::datatypes::SchemaRef;
+use datafusion::{
+    error::{DataFusionError, Result},
+    physical_expr::EquivalenceProperties,
+    physical_plan::{ExecutionMode, PlanProperties},
+    prelude::SessionContext,
+};
+use datafusion_proto::{
+    physical_plan::{
+        from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning},
+        to_proto::{serialize_partitioning, serialize_physical_sort_exprs},
+        DefaultPhysicalExtensionCodec,
+    },
+    protobuf::{Partitioning, PhysicalSortExprNodeCollection},
+};
+use prost::Message;
+
+use crate::arrow_wrappers::WrappedSchema;
+
+/// A stable struct for sharing [`PlanProperties`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_PlanProperties {
+    /// The output partitioning is a [`Partitioning`] protobuf message serialized
+    /// into bytes to pass across the FFI boundary.
+    pub output_partitioning:
+        unsafe extern "C" fn(plan: &Self) -> RResult<RVec<u8>, RStr<'static>>,
+
+    /// Return the execution mode of the plan.
+    pub execution_mode: unsafe extern "C" fn(plan: &Self) -> FFI_ExecutionMode,
+
+    /// The output ordering is a [`PhysicalSortExprNodeCollection`] protobuf message
+    /// serialized into bytes to pass across the FFI boundary.
+    pub output_ordering:
+        unsafe extern "C" fn(plan: &Self) -> RResult<RVec<u8>, RStr<'static>>,
+
+    /// Return the schema of the plan.
+    pub schema: unsafe extern "C" fn(plan: &Self) -> WrappedSchema,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// The foreign library should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+struct PlanPropertiesPrivateData {
+    props: PlanProperties,
+}
+
+unsafe extern "C" fn output_partitioning_fn_wrapper(
+    properties: &FFI_PlanProperties,
+) -> RResult<RVec<u8>, RStr<'static>> {
+    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
+    let props = &(*private_data).props;
+
+    let codec = DefaultPhysicalExtensionCodec {};
+    let partitioning_data =
+        match serialize_partitioning(props.output_partitioning(), &codec) {
+            Ok(p) => p,
+            Err(_) => {
+                return RErr(
+                    "unable to serialize output_partitioning in FFI_PlanProperties"
+                        .into(),
+                )
+            }
+        };
+    let output_partitioning = partitioning_data.encode_to_vec();
+
+    ROk(output_partitioning.into())
+}
+
+unsafe extern "C" fn execution_mode_fn_wrapper(
+    properties: &FFI_PlanProperties,
+) -> FFI_ExecutionMode {
+    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
+    let props = &(*private_data).props;
+    props.execution_mode().into()
+}
+
+unsafe extern "C" fn output_ordering_fn_wrapper(
+    properties: &FFI_PlanProperties,
+) -> RResult<RVec<u8>, RStr<'static>> {
+    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
+    let props = &(*private_data).props;
+
+    let codec = DefaultPhysicalExtensionCodec {};
+    let output_ordering =
+        match props.output_ordering() {
+            Some(ordering) => {
+                let physical_sort_expr_nodes =
+                    match serialize_physical_sort_exprs(ordering.to_owned(), &codec) {
+                        Ok(v) => v,
+                        Err(_) => return RErr(
+                            "unable to serialize output_ordering in FFI_PlanProperties"
+                                .into(),
+                        ),
+                    };
+
+                let ordering_data = PhysicalSortExprNodeCollection {
+                    physical_sort_expr_nodes,
+                };
+
+                ordering_data.encode_to_vec()
+            }
+            None => Vec::default(),
+        };
+    ROk(output_ordering.into())
+}
+
+unsafe extern "C" fn schema_fn_wrapper(properties: &FFI_PlanProperties) -> WrappedSchema {
+    let private_data = properties.private_data as *const PlanPropertiesPrivateData;
+    let props = &(*private_data).props;
+
+    let schema: SchemaRef = Arc::clone(props.eq_properties.schema());
+    schema.into()
+}
+
+unsafe extern "C" fn release_fn_wrapper(props: &mut FFI_PlanProperties) {
+    let private_data =
+        Box::from_raw(props.private_data as *mut PlanPropertiesPrivateData);
+    drop(private_data);
+}
+
+impl Drop for FFI_PlanProperties {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl From<&PlanProperties> for FFI_PlanProperties {
+    fn from(props: &PlanProperties) -> Self {
+        let private_data = Box::new(PlanPropertiesPrivateData {
+            props: props.clone(),
+        });
+
+        FFI_PlanProperties {
+            output_partitioning: output_partitioning_fn_wrapper,
+            execution_mode: execution_mode_fn_wrapper,
+            output_ordering: output_ordering_fn_wrapper,
+            schema: schema_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+        }
+    }
+}
+
+impl TryFrom<FFI_PlanProperties> for PlanProperties {
+    type Error = DataFusionError;
+
+    fn try_from(ffi_props: FFI_PlanProperties) -> Result<Self, Self::Error> {
+        let ffi_schema = unsafe { (ffi_props.schema)(&ffi_props) };
+        let schema = (&ffi_schema.0).try_into()?;
+
+        // TODO Extend FFI to get the registry and codex
+        let default_ctx = SessionContext::new();
+        let codex = DefaultPhysicalExtensionCodec {};
+
+        let ffi_orderings = unsafe { (ffi_props.output_ordering)(&ffi_props) };
+        let orderings = match ffi_orderings {
+            ROk(ordering_vec) => {
+                let proto_output_ordering =
+                    PhysicalSortExprNodeCollection::decode(ordering_vec.as_ref())
+                        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                Some(parse_physical_sort_exprs(
+                    &proto_output_ordering.physical_sort_expr_nodes,
+                    &default_ctx,
+                    &schema,
+                    &codex,
+                )?)
+            }
+            RErr(e) => return Err(DataFusionError::Plan(e.to_string())),
+        };
+
+        let ffi_partitioning = unsafe { (ffi_props.output_partitioning)(&ffi_props) };
+        let partitioning = match ffi_partitioning {
+            ROk(partitioning_vec) => {
+                let proto_output_partitioning =
+                    Partitioning::decode(partitioning_vec.as_ref())
+                        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                parse_protobuf_partitioning(
+                    Some(&proto_output_partitioning),
+                    &default_ctx,
+                    &schema,
+                    &codex,
+                )?
+                .ok_or(DataFusionError::Plan(
+                    "Unable to deserialize partitioning protobuf in FFI_PlanProperties"
+                        .to_string(),
+                ))
+            }
+            RErr(e) => Err(DataFusionError::Plan(e.to_string())),
+        }?;
+
+        let execution_mode: ExecutionMode =
+            unsafe { (ffi_props.execution_mode)(&ffi_props).into() };
+
+        let eq_properties = match orderings {
+            Some(ordering) => {
+                EquivalenceProperties::new_with_orderings(Arc::new(schema), &[ordering])
+            }
+            None => EquivalenceProperties::new(Arc::new(schema)),
+        };
+
+        Ok(PlanProperties::new(
+            eq_properties,
+            partitioning,
+            execution_mode,
+        ))
+    }
+}
+
+/// FFI safe version of [`ExecutionMode`].
+#[repr(C)]
+#[allow(non_camel_case_types)]
+#[derive(Clone, StableAbi)]
+pub enum FFI_ExecutionMode {
+    Bounded,
+    Unbounded,
+    PipelineBreaking,
+}
+
+impl From<ExecutionMode> for FFI_ExecutionMode {
+    fn from(value: ExecutionMode) -> Self {
+        match value {
+            ExecutionMode::Bounded => FFI_ExecutionMode::Bounded,
+            ExecutionMode::Unbounded => FFI_ExecutionMode::Unbounded,
+            ExecutionMode::PipelineBreaking => FFI_ExecutionMode::PipelineBreaking,
+        }
+    }
+}
+
+impl From<FFI_ExecutionMode> for ExecutionMode {
+    fn from(value: FFI_ExecutionMode) -> Self {
+        match value {
+            FFI_ExecutionMode::Bounded => ExecutionMode::Bounded,
+            FFI_ExecutionMode::Unbounded => ExecutionMode::Unbounded,
+            FFI_ExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::physical_plan::Partitioning;
+
+    use super::*;
+
+    #[test]
+    fn test_round_trip_ffi_plan_properties() -> Result<()> {
+        use arrow::datatypes::{DataType, Field, Schema};
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+
+        let original_props = PlanProperties::new(
+            EquivalenceProperties::new(schema),
+            Partitioning::UnknownPartitioning(3),
+            ExecutionMode::Unbounded,
+        );
+
+        let local_props_ptr = FFI_PlanProperties::from(&original_props);
+
+        let foreign_props: PlanProperties = local_props_ptr.try_into()?;
+
+        assert!(format!("{:?}", foreign_props) == format!("{:?}", original_props));
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/record_batch_stream.rs b/datafusion/ffi/src/record_batch_stream.rs
new file mode 100644
index 000000000000..c944e56c5cde
--- /dev/null
+++ b/datafusion/ffi/src/record_batch_stream.rs
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::c_void, task::Poll};
+
+use abi_stable::{
+    std_types::{ROption, RResult, RString},
+    StableAbi,
+};
+use arrow::array::{Array, RecordBatch};
+use arrow::{
+    array::{make_array, StructArray},
+    ffi::{from_ffi, to_ffi},
+};
+use async_ffi::{ContextExt, FfiContext, FfiPoll};
+use datafusion::error::Result;
+use datafusion::{
+    error::DataFusionError,
+    execution::{RecordBatchStream, SendableRecordBatchStream},
+};
+use futures::{Stream, TryStreamExt};
+
+use crate::arrow_wrappers::{WrappedArray, WrappedSchema};
+
+/// A stable struct for sharing [`RecordBatchStream`] across FFI boundaries.
+/// We use the async-ffi crate for handling async calls across libraries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_RecordBatchStream {
+    /// This mirrors the `poll_next` of [`RecordBatchStream`] but does so
+    /// in a FFI safe manner.
+    pub poll_next:
+        unsafe extern "C" fn(
+            stream: &Self,
+            cx: &mut FfiContext,
+        ) -> FfiPoll<ROption<RResult<WrappedArray, RString>>>,
+
+    /// Return the schema of the record batch
+    pub schema: unsafe extern "C" fn(stream: &Self) -> WrappedSchema,
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// The foreign library should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+impl From<SendableRecordBatchStream> for FFI_RecordBatchStream {
+    fn from(stream: SendableRecordBatchStream) -> Self {
+        FFI_RecordBatchStream {
+            poll_next: poll_next_fn_wrapper,
+            schema: schema_fn_wrapper,
+            private_data: Box::into_raw(Box::new(stream)) as *mut c_void,
+        }
+    }
+}
+
+unsafe impl Send for FFI_RecordBatchStream {}
+
+unsafe extern "C" fn schema_fn_wrapper(stream: &FFI_RecordBatchStream) -> WrappedSchema {
+    let stream = stream.private_data as *const SendableRecordBatchStream;
+
+    (*stream).schema().into()
+}
+
+fn record_batch_to_wrapped_array(
+    record_batch: RecordBatch,
+) -> RResult<WrappedArray, RString> {
+    let struct_array = StructArray::from(record_batch);
+    match to_ffi(&struct_array.to_data()) {
+        Ok((array, schema)) => RResult::ROk(WrappedArray {
+            array,
+            schema: WrappedSchema(schema),
+        }),
+        Err(e) => RResult::RErr(e.to_string().into()),
+    }
+}
+
+// probably want to use pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
+fn maybe_record_batch_to_wrapped_stream(
+    record_batch: Option<Result<RecordBatch>>,
+) -> ROption<RResult<WrappedArray, RString>> {
+    match record_batch {
+        Some(Ok(record_batch)) => {
+            ROption::RSome(record_batch_to_wrapped_array(record_batch))
+        }
+        Some(Err(e)) => ROption::RSome(RResult::RErr(e.to_string().into())),
+        None => ROption::RNone,
+    }
+}
+
+unsafe extern "C" fn poll_next_fn_wrapper(
+    stream: &FFI_RecordBatchStream,
+    cx: &mut FfiContext,
+) -> FfiPoll<ROption<RResult<WrappedArray, RString>>> {
+    let stream = stream.private_data as *mut SendableRecordBatchStream;
+
+    let poll_result = cx.with_context(|std_cx| {
+        (*stream)
+            .try_poll_next_unpin(std_cx)
+            .map(maybe_record_batch_to_wrapped_stream)
+    });
+
+    poll_result.into()
+}
+
+impl RecordBatchStream for FFI_RecordBatchStream {
+    fn schema(&self) -> arrow::datatypes::SchemaRef {
+        let wrapped_schema = unsafe { (self.schema)(self) };
+        wrapped_schema.into()
+    }
+}
+
+fn wrapped_array_to_record_batch(array: WrappedArray) -> Result<RecordBatch> {
+    let array_data =
+        unsafe { from_ffi(array.array, &array.schema.0).map_err(DataFusionError::from)? };
+    let array = make_array(array_data);
+    let struct_array = array
+        .as_any()
+        .downcast_ref::<StructArray>()
+        .ok_or(DataFusionError::Execution(
+        "Unexpected array type during record batch collection in FFI_RecordBatchStream"
+            .to_string(),
+    ))?;
+
+    Ok(struct_array.into())
+}
+
+fn maybe_wrapped_array_to_record_batch(
+    array: ROption<RResult<WrappedArray, RString>>,
+) -> Option<Result<RecordBatch>> {
+    match array {
+        ROption::RSome(RResult::ROk(wrapped_array)) => {
+            Some(wrapped_array_to_record_batch(wrapped_array))
+        }
+        ROption::RSome(RResult::RErr(e)) => {
+            Some(Err(DataFusionError::Execution(e.to_string())))
+        }
+        ROption::RNone => None,
+    }
+}
+
+impl Stream for FFI_RecordBatchStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        let poll_result =
+            unsafe { cx.with_ffi_context(|ffi_cx| (self.poll_next)(&self, ffi_cx)) };
+
+        match poll_result {
+            FfiPoll::Ready(array) => {
+                Poll::Ready(maybe_wrapped_array_to_record_batch(array))
+            }
+            FfiPoll::Pending => Poll::Pending,
+            FfiPoll::Panicked => Poll::Ready(Some(Err(DataFusionError::Execution(
+                "Error occurred during poll_next on FFI_RecordBatchStream".to_string(),
+            )))),
+        }
+    }
+}
diff --git a/datafusion/ffi/src/session_config.rs b/datafusion/ffi/src/session_config.rs
new file mode 100644
index 000000000000..aea03cf94e0a
--- /dev/null
+++ b/datafusion/ffi/src/session_config.rs
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    collections::HashMap,
+    ffi::{c_char, c_void, CString},
+};
+
+use abi_stable::{
+    std_types::{RHashMap, RString},
+    StableAbi,
+};
+use datafusion::{config::ConfigOptions, error::Result};
+use datafusion::{error::DataFusionError, prelude::SessionConfig};
+
+/// A stable struct for sharing [`SessionConfig`] across FFI boundaries.
+/// Instead of attempting to expose the entire SessionConfig interface, we
+/// convert the config options into a map from a string to string and pass
+/// those values across the FFI boundary. On the receiver side, we
+/// reconstruct a SessionConfig from those values.
+///
+/// It is possible that using different versions of DataFusion across the
+/// FFI boundary could have differing expectations of the config options.
+/// This is a limitation of this approach, but exposing the entire
+/// SessionConfig via a FFI interface would be extensive and provide limited
+/// value over this version.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_SessionConfig {
+    /// Return a hash map from key to value of the config options represented
+    /// by string values.
+    pub config_options: unsafe extern "C" fn(config: &Self) -> RHashMap<RString, RString>,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignSessionConfig`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+unsafe impl Send for FFI_SessionConfig {}
+unsafe impl Sync for FFI_SessionConfig {}
+
+unsafe extern "C" fn config_options_fn_wrapper(
+    config: &FFI_SessionConfig,
+) -> RHashMap<RString, RString> {
+    let private_data = config.private_data as *mut SessionConfigPrivateData;
+    let config_options = &(*private_data).config;
+
+    let mut options = RHashMap::default();
+    for config_entry in config_options.entries() {
+        if let Some(value) = config_entry.value {
+            options.insert(config_entry.key.into(), value.into());
+        }
+    }
+
+    options
+}
+
+unsafe extern "C" fn release_fn_wrapper(config: &mut FFI_SessionConfig) {
+    let private_data =
+        Box::from_raw(config.private_data as *mut SessionConfigPrivateData);
+    drop(private_data);
+}
+
+unsafe extern "C" fn clone_fn_wrapper(config: &FFI_SessionConfig) -> FFI_SessionConfig {
+    let old_private_data = config.private_data as *mut SessionConfigPrivateData;
+    let old_config = &(*old_private_data).config;
+
+    let private_data = Box::new(SessionConfigPrivateData {
+        config: old_config.clone(),
+    });
+
+    FFI_SessionConfig {
+        config_options: config_options_fn_wrapper,
+        private_data: Box::into_raw(private_data) as *mut c_void,
+        clone: clone_fn_wrapper,
+        release: release_fn_wrapper,
+    }
+}
+
+struct SessionConfigPrivateData {
+    pub config: ConfigOptions,
+}
+
+impl From<&SessionConfig> for FFI_SessionConfig {
+    fn from(session: &SessionConfig) -> Self {
+        let mut config_keys = Vec::new();
+        let mut config_values = Vec::new();
+        for config_entry in session.options().entries() {
+            if let Some(value) = config_entry.value {
+                let key_cstr = CString::new(config_entry.key).unwrap_or_default();
+                let key_ptr = key_cstr.into_raw() as *const c_char;
+                config_keys.push(key_ptr);
+
+                config_values
+                    .push(CString::new(value).unwrap_or_default().into_raw()
+                        as *const c_char);
+            }
+        }
+
+        let private_data = Box::new(SessionConfigPrivateData {
+            config: session.options().clone(),
+        });
+
+        Self {
+            config_options: config_options_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+        }
+    }
+}
+
+impl Clone for FFI_SessionConfig {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl Drop for FFI_SessionConfig {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) };
+    }
+}
+
+/// A wrapper struct for accessing [`SessionConfig`] across a FFI boundary.
+/// The [`SessionConfig`] will be generated from a hash map of the config
+/// options in the provider and will be reconstructed on this side of the
+/// interface.s
+pub struct ForeignSessionConfig(pub SessionConfig);
+
+impl TryFrom<&FFI_SessionConfig> for ForeignSessionConfig {
+    type Error = DataFusionError;
+
+    fn try_from(config: &FFI_SessionConfig) -> Result<Self, Self::Error> {
+        let config_options = unsafe { (config.config_options)(config) };
+
+        let mut options_map = HashMap::new();
+        config_options.iter().for_each(|kv_pair| {
+            options_map.insert(kv_pair.0.to_string(), kv_pair.1.to_string());
+        });
+
+        Ok(Self(SessionConfig::from_string_hash_map(&options_map)?))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_round_trip_ffi_session_config() -> Result<()> {
+        let session_config = SessionConfig::new();
+        let original_options = session_config.options().entries();
+
+        let ffi_config: FFI_SessionConfig = (&session_config).into();
+
+        let foreign_config: ForeignSessionConfig = (&ffi_config).try_into()?;
+
+        let returned_options = foreign_config.0.options().entries();
+
+        assert!(original_options.len() == returned_options.len());
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs
new file mode 100644
index 000000000000..011ad96e423d
--- /dev/null
+++ b/datafusion/ffi/src/table_provider.rs
@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{any::Any, ffi::c_void, sync::Arc};
+
+use abi_stable::{
+    std_types::{ROption, RResult, RString, RVec},
+    StableAbi,
+};
+use arrow::datatypes::SchemaRef;
+use async_ffi::{FfiFuture, FutureExt};
+use async_trait::async_trait;
+use datafusion::{
+    catalog::{Session, TableProvider},
+    datasource::TableType,
+    error::DataFusionError,
+    execution::session_state::SessionStateBuilder,
+    logical_expr::TableProviderFilterPushDown,
+    physical_plan::ExecutionPlan,
+    prelude::{Expr, SessionContext},
+};
+use datafusion_proto::{
+    logical_plan::{
+        from_proto::parse_exprs, to_proto::serialize_exprs, DefaultLogicalExtensionCodec,
+    },
+    protobuf::LogicalExprList,
+};
+use prost::Message;
+
+use crate::{
+    arrow_wrappers::WrappedSchema,
+    session_config::ForeignSessionConfig,
+    table_source::{FFI_TableProviderFilterPushDown, FFI_TableType},
+};
+
+use super::{
+    execution_plan::{FFI_ExecutionPlan, ForeignExecutionPlan},
+    session_config::FFI_SessionConfig,
+};
+use datafusion::error::Result;
+
+/// A stable struct for sharing [`TableProvider`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_TableProvider {
+    /// Return the table schema
+    pub schema: unsafe extern "C" fn(provider: &Self) -> WrappedSchema,
+
+    /// Perform a scan on the table. See [`TableProvider`] for detailed usage information.
+    ///
+    /// # Arguments
+    ///
+    /// * `provider` - the table provider
+    /// * `session_config` - session configuration
+    /// * `projections` - if specified, only a subset of the columns are returned
+    /// * `filters_serialized` - filters to apply to the scan, which are a
+    ///    [`LogicalExprList`] protobuf message serialized into bytes to pass
+    ///    across the FFI boundary.
+    /// * `limit` - if specified, limit the number of rows returned
+    pub scan: unsafe extern "C" fn(
+        provider: &Self,
+        session_config: &FFI_SessionConfig,
+        projections: RVec<usize>,
+        filters_serialized: RVec<u8>,
+        limit: ROption<usize>,
+    ) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>>,
+
+    /// Return the type of table. See [`TableType`] for options.
+    pub table_type: unsafe extern "C" fn(provider: &Self) -> FFI_TableType,
+
+    /// Based upon the input filters, identify which are supported. The filters
+    /// are a [`LogicalExprList`] protobuf message serialized into bytes to pass
+    /// across the FFI boundary.
+    pub supports_filters_pushdown: Option<
+        unsafe extern "C" fn(
+            provider: &FFI_TableProvider,
+            filters_serialized: RVec<u8>,
+        )
+            -> RResult<RVec<FFI_TableProviderFilterPushDown>, RString>,
+    >,
+
+    /// Used to create a clone on the provider of the execution plan. This should
+    /// only need to be called by the receiver of the plan.
+    pub clone: unsafe extern "C" fn(plan: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(arg: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the plan.
+    /// A [`ForeignExecutionPlan`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+unsafe impl Send for FFI_TableProvider {}
+unsafe impl Sync for FFI_TableProvider {}
+
+struct ProviderPrivateData {
+    provider: Arc<dyn TableProvider + Send>,
+}
+
+unsafe extern "C" fn schema_fn_wrapper(provider: &FFI_TableProvider) -> WrappedSchema {
+    let private_data = provider.private_data as *const ProviderPrivateData;
+    let provider = &(*private_data).provider;
+
+    provider.schema().into()
+}
+
+unsafe extern "C" fn table_type_fn_wrapper(
+    provider: &FFI_TableProvider,
+) -> FFI_TableType {
+    let private_data = provider.private_data as *const ProviderPrivateData;
+    let provider = &(*private_data).provider;
+
+    provider.table_type().into()
+}
+
+fn supports_filters_pushdown_internal(
+    provider: &Arc<dyn TableProvider + Send>,
+    filters_serialized: &[u8],
+) -> Result<RVec<FFI_TableProviderFilterPushDown>> {
+    let default_ctx = SessionContext::new();
+    let codec = DefaultLogicalExtensionCodec {};
+
+    let filters = match filters_serialized.is_empty() {
+        true => vec![],
+        false => {
+            let proto_filters = LogicalExprList::decode(filters_serialized)
+                .map_err(|e| DataFusionError::Plan(e.to_string()))?;
+
+            parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec)?
+        }
+    };
+    let filters_borrowed: Vec<&Expr> = filters.iter().collect();
+
+    let results: RVec<_> = provider
+        .supports_filters_pushdown(&filters_borrowed)?
+        .iter()
+        .map(|v| v.into())
+        .collect();
+
+    Ok(results)
+}
+
+unsafe extern "C" fn supports_filters_pushdown_fn_wrapper(
+    provider: &FFI_TableProvider,
+    filters_serialized: RVec<u8>,
+) -> RResult<RVec<FFI_TableProviderFilterPushDown>, RString> {
+    let private_data = provider.private_data as *const ProviderPrivateData;
+    let provider = &(*private_data).provider;
+
+    supports_filters_pushdown_internal(provider, &filters_serialized)
+        .map_err(|e| e.to_string().into())
+        .into()
+}
+
+unsafe extern "C" fn scan_fn_wrapper(
+    provider: &FFI_TableProvider,
+    session_config: &FFI_SessionConfig,
+    projections: RVec<usize>,
+    filters_serialized: RVec<u8>,
+    limit: ROption<usize>,
+) -> FfiFuture<RResult<FFI_ExecutionPlan, RString>> {
+    let private_data = provider.private_data as *mut ProviderPrivateData;
+    let internal_provider = &(*private_data).provider;
+    let session_config = session_config.clone();
+
+    async move {
+        let config = match ForeignSessionConfig::try_from(&session_config) {
+            Ok(c) => c,
+            Err(e) => return RResult::RErr(e.to_string().into()),
+        };
+        let session = SessionStateBuilder::new()
+            .with_default_features()
+            .with_config(config.0)
+            .build();
+        let ctx = SessionContext::new_with_state(session);
+
+        let filters = match filters_serialized.is_empty() {
+            true => vec![],
+            false => {
+                let default_ctx = SessionContext::new();
+                let codec = DefaultLogicalExtensionCodec {};
+
+                let proto_filters =
+                    match LogicalExprList::decode(filters_serialized.as_ref()) {
+                        Ok(f) => f,
+                        Err(e) => return RResult::RErr(e.to_string().into()),
+                    };
+
+                match parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec) {
+                    Ok(f) => f,
+                    Err(e) => return RResult::RErr(e.to_string().into()),
+                }
+            }
+        };
+
+        let projections: Vec<_> = projections.into_iter().collect();
+        let maybe_projections = match projections.is_empty() {
+            true => None,
+            false => Some(&projections),
+        };
+
+        let plan = match internal_provider
+            .scan(&ctx.state(), maybe_projections, &filters, limit.into())
+            .await
+        {
+            Ok(p) => p,
+            Err(e) => return RResult::RErr(e.to_string().into()),
+        };
+
+        RResult::ROk(FFI_ExecutionPlan::new(plan, ctx.task_ctx()))
+    }
+    .into_ffi()
+}
+
+unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_TableProvider) {
+    let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData);
+    drop(private_data);
+}
+
+unsafe extern "C" fn clone_fn_wrapper(provider: &FFI_TableProvider) -> FFI_TableProvider {
+    let old_private_data = provider.private_data as *const ProviderPrivateData;
+
+    let private_data = Box::into_raw(Box::new(ProviderPrivateData {
+        provider: Arc::clone(&(*old_private_data).provider),
+    })) as *mut c_void;
+
+    FFI_TableProvider {
+        schema: schema_fn_wrapper,
+        scan: scan_fn_wrapper,
+        table_type: table_type_fn_wrapper,
+        supports_filters_pushdown: provider.supports_filters_pushdown,
+        clone: clone_fn_wrapper,
+        release: release_fn_wrapper,
+        private_data,
+    }
+}
+
+impl Drop for FFI_TableProvider {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+impl FFI_TableProvider {
+    /// Creates a new [`FFI_TableProvider`].
+    pub fn new(
+        provider: Arc<dyn TableProvider + Send>,
+        can_support_pushdown_filters: bool,
+    ) -> Self {
+        let private_data = Box::new(ProviderPrivateData { provider });
+
+        Self {
+            schema: schema_fn_wrapper,
+            scan: scan_fn_wrapper,
+            table_type: table_type_fn_wrapper,
+            supports_filters_pushdown: match can_support_pushdown_filters {
+                true => Some(supports_filters_pushdown_fn_wrapper),
+                false => None,
+            },
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+        }
+    }
+}
+
+/// This wrapper struct exists on the reciever side of the FFI interface, so it has
+/// no guarantees about being able to access the data in `private_data`. Any functions
+/// defined on this struct must only use the stable functions provided in
+/// FFI_TableProvider to interact with the foreign table provider.
+#[derive(Debug)]
+pub struct ForeignTableProvider(FFI_TableProvider);
+
+unsafe impl Send for ForeignTableProvider {}
+unsafe impl Sync for ForeignTableProvider {}
+
+impl From<&FFI_TableProvider> for ForeignTableProvider {
+    fn from(provider: &FFI_TableProvider) -> Self {
+        Self(provider.clone())
+    }
+}
+
+impl Clone for FFI_TableProvider {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+#[async_trait]
+impl TableProvider for ForeignTableProvider {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        let wrapped_schema = unsafe { (self.0.schema)(&self.0) };
+        wrapped_schema.into()
+    }
+
+    fn table_type(&self) -> TableType {
+        unsafe { (self.0.table_type)(&self.0).into() }
+    }
+
+    async fn scan(
+        &self,
+        session: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let session_config: FFI_SessionConfig = session.config().into();
+
+        let projections: Option<RVec<usize>> =
+            projection.map(|p| p.iter().map(|v| v.to_owned()).collect());
+
+        let codec = DefaultLogicalExtensionCodec {};
+        let filter_list = LogicalExprList {
+            expr: serialize_exprs(filters, &codec)?,
+        };
+        let filters_serialized = filter_list.encode_to_vec().into();
+
+        let plan = unsafe {
+            let maybe_plan = (self.0.scan)(
+                &self.0,
+                &session_config,
+                projections.unwrap_or_default(),
+                filters_serialized,
+                limit.into(),
+            )
+            .await;
+
+            match maybe_plan {
+                RResult::ROk(p) => ForeignExecutionPlan::try_from(&p)?,
+                RResult::RErr(_) => {
+                    return Err(DataFusionError::Internal(
+                        "Unable to perform scan via FFI".to_string(),
+                    ))
+                }
+            }
+        };
+
+        Ok(Arc::new(plan))
+    }
+
+    /// Tests whether the table provider can make use of a filter expression
+    /// to optimise data retrieval.
+    fn supports_filters_pushdown(
+        &self,
+        filters: &[&Expr],
+    ) -> Result<Vec<TableProviderFilterPushDown>> {
+        unsafe {
+            let pushdown_fn = match self.0.supports_filters_pushdown {
+                Some(func) => func,
+                None => {
+                    return Ok(vec![
+                        TableProviderFilterPushDown::Unsupported;
+                        filters.len()
+                    ])
+                }
+            };
+
+            let codec = DefaultLogicalExtensionCodec {};
+
+            let expr_list = LogicalExprList {
+                expr: serialize_exprs(filters.iter().map(|f| f.to_owned()), &codec)?,
+            };
+            let serialized_filters = expr_list.encode_to_vec();
+
+            let pushdowns = pushdown_fn(&self.0, serialized_filters.into());
+
+            match pushdowns {
+                RResult::ROk(p) => Ok(p.iter().map(|v| v.into()).collect()),
+                RResult::RErr(e) => Err(DataFusionError::Plan(e.to_string())),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::datatypes::Schema;
+    use datafusion::prelude::{col, lit};
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_round_trip_ffi_table_provider() -> Result<()> {
+        use arrow::datatypes::Field;
+        use datafusion::arrow::{
+            array::Float32Array, datatypes::DataType, record_batch::RecordBatch,
+        };
+        use datafusion::datasource::MemTable;
+
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+
+        // define data in two partitions
+        let batch1 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
+        )?;
+        let batch2 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Float32Array::from(vec![64.0]))],
+        )?;
+
+        let ctx = SessionContext::new();
+
+        let provider =
+            Arc::new(MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?);
+
+        let ffi_provider = FFI_TableProvider::new(provider, true);
+
+        let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into();
+
+        ctx.register_table("t", Arc::new(foreign_table_provider))?;
+
+        let df = ctx.table("t").await?;
+
+        df.select(vec![col("a")])?
+            .filter(col("a").gt(lit(3.0)))?
+            .show()
+            .await?;
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/src/table_source.rs b/datafusion/ffi/src/table_source.rs
new file mode 100644
index 000000000000..a59836622ee6
--- /dev/null
+++ b/datafusion/ffi/src/table_source.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use datafusion::{datasource::TableType, logical_expr::TableProviderFilterPushDown};
+
+/// FFI safe version of [`TableProviderFilterPushDown`].
+#[repr(C)]
+#[derive(StableAbi)]
+#[allow(non_camel_case_types)]
+pub enum FFI_TableProviderFilterPushDown {
+    Unsupported,
+    Inexact,
+    Exact,
+}
+
+impl From<&FFI_TableProviderFilterPushDown> for TableProviderFilterPushDown {
+    fn from(value: &FFI_TableProviderFilterPushDown) -> Self {
+        match value {
+            FFI_TableProviderFilterPushDown::Unsupported => {
+                TableProviderFilterPushDown::Unsupported
+            }
+            FFI_TableProviderFilterPushDown::Inexact => {
+                TableProviderFilterPushDown::Inexact
+            }
+            FFI_TableProviderFilterPushDown::Exact => TableProviderFilterPushDown::Exact,
+        }
+    }
+}
+
+impl From<&TableProviderFilterPushDown> for FFI_TableProviderFilterPushDown {
+    fn from(value: &TableProviderFilterPushDown) -> Self {
+        match value {
+            TableProviderFilterPushDown::Unsupported => {
+                FFI_TableProviderFilterPushDown::Unsupported
+            }
+            TableProviderFilterPushDown::Inexact => {
+                FFI_TableProviderFilterPushDown::Inexact
+            }
+            TableProviderFilterPushDown::Exact => FFI_TableProviderFilterPushDown::Exact,
+        }
+    }
+}
+
+/// FFI safe version of [`TableType`].
+#[repr(C)]
+#[allow(non_camel_case_types)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, StableAbi)]
+pub enum FFI_TableType {
+    Base,
+    View,
+    Temporary,
+}
+
+impl From<FFI_TableType> for TableType {
+    fn from(value: FFI_TableType) -> Self {
+        match value {
+            FFI_TableType::Base => TableType::Base,
+            FFI_TableType::View => TableType::View,
+            FFI_TableType::Temporary => TableType::Temporary,
+        }
+    }
+}
+
+impl From<TableType> for FFI_TableType {
+    fn from(value: TableType) -> Self {
+        match value {
+            TableType::Base => FFI_TableType::Base,
+            TableType::View => FFI_TableType::View,
+            TableType::Temporary => FFI_TableType::Temporary,
+        }
+    }
+}

From 497582906e826d3045e2c7b9d4ddb44ce6d42adf Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Wed, 30 Oct 2024 22:40:51 +0100
Subject: [PATCH 11/32] Report file location and offset when CSV schema
 mismatch (#13185)

---
 .../core/src/datasource/file_format/csv.rs     | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 3cb5ae4f85ca..2aaef2cda1c8 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -325,7 +325,13 @@ impl FileFormat for CsvFormat {
             let stream = self.read_to_delimited_chunks(store, object).await;
             let (schema, records_read) = self
                 .infer_schema_from_stream(state, records_to_read, stream)
-                .await?;
+                .await
+                .map_err(|err| {
+                    DataFusionError::Context(
+                        format!("Error when processing CSV file {}", &object.location),
+                        Box::new(err),
+                    )
+                })?;
             records_to_read -= records_read;
             schemas.push(schema);
             if records_to_read == 0 {
@@ -433,11 +439,13 @@ impl CsvFormat {
         let mut total_records_read = 0;
         let mut column_names = vec![];
         let mut column_type_possibilities = vec![];
-        let mut first_chunk = true;
+        let mut record_number = -1;
 
         pin_mut!(stream);
 
         while let Some(chunk) = stream.next().await.transpose()? {
+            record_number += 1;
+            let first_chunk = record_number == 0;
             let mut format = arrow::csv::reader::Format::default()
                 .with_header(
                     first_chunk
@@ -471,14 +479,14 @@ impl CsvFormat {
                         (field.name().clone(), possibilities)
                     })
                     .unzip();
-                first_chunk = false;
             } else {
                 if fields.len() != column_type_possibilities.len() {
                     return exec_err!(
                             "Encountered unequal lengths between records on CSV file whilst inferring schema. \
-                             Expected {} records, found {} records",
+                             Expected {} fields, found {} fields at record {}",
                             column_type_possibilities.len(),
-                            fields.len()
+                            fields.len(),
+                            record_number + 1
                         );
                 }
 

From f23360f1e0f90abcf92a067de55a9053da545ed2 Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Thu, 31 Oct 2024 07:37:41 +0800
Subject: [PATCH 12/32] Round robin polling between tied winners in sort
 preserving merge (#13133)

* first draft

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add data

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix benchmark

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more bencmark data

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix benchmark

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* get max size

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add license

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm code for merge

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* update poll count only we have tie

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* upd comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix logic

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* configurable

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add mem limit test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* escape bracket

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm per consumer record

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* repartition limit

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add benchmark

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* benchmark with parameter

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* only calculate consumer pool if the limit is set

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* combine eq and gt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* review part 1

* Update merge.rs

* upd doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* no need index comparison

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* combine handle tie and eq check

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* upd doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* remove flag

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* upd comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* Revert "remove flag"

This reverts commit 8d6c0a6de540ed60c387c0d7abf124309922e159.

* Revert "upd comment"

This reverts commit a18cba8a61f089313d96ed84940ca17d9c7a7b2d.

* add more comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add more comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* simpliy mem pool

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* Update merge.rs

* minor

* add comment

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: berkaysynnada <berkay.sahin@synnada.ai>
---
 datafusion/physical-plan/Cargo.toml           |   5 +
 datafusion/physical-plan/benches/spm.rs       | 145 +++++++++++++
 datafusion/physical-plan/src/sorts/cursor.rs  |  54 +++++
 datafusion/physical-plan/src/sorts/merge.rs   | 199 ++++++++++++++++--
 .../src/sorts/sort_preserving_merge.rs        |  93 ++++++++
 .../src/sorts/streaming_merge.rs              |  29 ++-
 6 files changed, 501 insertions(+), 24 deletions(-)
 create mode 100644 datafusion/physical-plan/benches/spm.rs

diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 7fcd719539ec..a9f9b22fafda 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -68,6 +68,7 @@ rand = { workspace = true }
 tokio = { workspace = true }
 
 [dev-dependencies]
+criterion = { version = "0.5", features = ["async_futures"] }
 datafusion-functions-aggregate = { workspace = true }
 rstest = { workspace = true }
 rstest_reuse = "0.7.0"
@@ -76,3 +77,7 @@ tokio = { workspace = true, features = [
     "fs",
     "parking_lot",
 ] }
+
+[[bench]]
+harness = false
+name = "spm"
diff --git a/datafusion/physical-plan/benches/spm.rs b/datafusion/physical-plan/benches/spm.rs
new file mode 100644
index 000000000000..9cc703f5f726
--- /dev/null
+++ b/datafusion/physical-plan/benches/spm.rs
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::record_batch::RecordBatch;
+use arrow_array::{ArrayRef, Int32Array, Int64Array, StringArray};
+use datafusion_execution::TaskContext;
+use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_plan::memory::MemoryExec;
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::{collect, ExecutionPlan};
+
+use criterion::async_executor::FuturesExecutor;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+fn generate_spm_for_round_robin_tie_breaker(
+    has_same_value: bool,
+    enable_round_robin_repartition: bool,
+    batch_count: usize,
+    partition_count: usize,
+) -> SortPreservingMergeExec {
+    let row_size = 256;
+    let rb = if has_same_value {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1; row_size]));
+        let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a"); row_size]));
+        let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size]));
+        RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap()
+    } else {
+        let v = (0i32..row_size as i32).collect::<Vec<_>>();
+        let a: ArrayRef = Arc::new(Int32Array::from(v));
+
+        // Use alphanumeric characters
+        let charset: Vec<char> =
+            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+                .chars()
+                .collect();
+
+        let mut strings = Vec::new();
+        for i in 0..256 {
+            let mut s = String::new();
+            s.push(charset[i % charset.len()]);
+            s.push(charset[(i / charset.len()) % charset.len()]);
+            strings.push(Some(s));
+        }
+
+        let b: ArrayRef = Arc::new(StringArray::from_iter(strings));
+
+        let v = (0i64..row_size as i64).collect::<Vec<_>>();
+        let c: ArrayRef = Arc::new(Int64Array::from_iter(v));
+        RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap()
+    };
+
+    let rbs = (0..batch_count).map(|_| rb.clone()).collect::<Vec<_>>();
+    let partitiones = vec![rbs.clone(); partition_count];
+
+    let schema = rb.schema();
+    let sort = vec![
+        PhysicalSortExpr {
+            expr: col("b", &schema).unwrap(),
+            options: Default::default(),
+        },
+        PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: Default::default(),
+        },
+    ];
+
+    let exec = MemoryExec::try_new(&partitiones, schema, None).unwrap();
+    SortPreservingMergeExec::new(sort, Arc::new(exec))
+        .with_round_robin_repartition(enable_round_robin_repartition)
+}
+
+fn run_bench(
+    c: &mut Criterion,
+    has_same_value: bool,
+    enable_round_robin_repartition: bool,
+    batch_count: usize,
+    partition_count: usize,
+    description: &str,
+) {
+    let task_ctx = TaskContext::default();
+    let task_ctx = Arc::new(task_ctx);
+
+    let spm = Arc::new(generate_spm_for_round_robin_tie_breaker(
+        has_same_value,
+        enable_round_robin_repartition,
+        batch_count,
+        partition_count,
+    )) as Arc<dyn ExecutionPlan>;
+
+    c.bench_function(description, |b| {
+        b.to_async(FuturesExecutor)
+            .iter(|| black_box(collect(Arc::clone(&spm), Arc::clone(&task_ctx))))
+    });
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let params = [
+        (true, false, "low_card_without_tiebreaker"), // low cardinality, no tie breaker
+        (true, true, "low_card_with_tiebreaker"),     // low cardinality, with tie breaker
+        (false, false, "high_card_without_tiebreaker"), // high cardinality, no tie breaker
+        (false, true, "high_card_with_tiebreaker"), // high cardinality, with tie breaker
+    ];
+
+    let batch_counts = [1, 25, 625];
+    let partition_counts = [2, 8, 32];
+
+    for &(has_same_value, enable_round_robin_repartition, cardinality_label) in &params {
+        for &batch_count in &batch_counts {
+            for &partition_count in &partition_counts {
+                let description = format!(
+                    "{}_batch_count_{}_partition_count_{}",
+                    cardinality_label, batch_count, partition_count
+                );
+                run_bench(
+                    c,
+                    has_same_value,
+                    enable_round_robin_repartition,
+                    batch_count,
+                    partition_count,
+                    &description,
+                );
+            }
+        }
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
index df90c97faf68..133d736c1467 100644
--- a/datafusion/physical-plan/src/sorts/cursor.rs
+++ b/datafusion/physical-plan/src/sorts/cursor.rs
@@ -38,6 +38,10 @@ pub trait CursorValues {
     /// Returns true if `l[l_idx] == r[r_idx]`
     fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool;
 
+    /// Returns true if `row[idx] == row[idx - 1]`
+    /// Given `idx` should be greater than 0
+    fn eq_to_previous(cursor: &Self, idx: usize) -> bool;
+
     /// Returns comparison of `l[l_idx]` and `r[r_idx]`
     fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering;
 }
@@ -95,6 +99,16 @@ impl<T: CursorValues> Cursor<T> {
         self.offset += 1;
         t
     }
+
+    pub fn is_eq_to_prev_one(&self, prev_cursor: Option<&Cursor<T>>) -> bool {
+        if self.offset > 0 {
+            self.is_eq_to_prev_row()
+        } else if let Some(prev_cursor) = prev_cursor {
+            self.is_eq_to_prev_row_in_prev_batch(prev_cursor)
+        } else {
+            false
+        }
+    }
 }
 
 impl<T: CursorValues> PartialEq for Cursor<T> {
@@ -103,6 +117,22 @@ impl<T: CursorValues> PartialEq for Cursor<T> {
     }
 }
 
+impl<T: CursorValues> Cursor<T> {
+    fn is_eq_to_prev_row(&self) -> bool {
+        T::eq_to_previous(&self.values, self.offset)
+    }
+
+    fn is_eq_to_prev_row_in_prev_batch(&self, other: &Self) -> bool {
+        assert_eq!(self.offset, 0);
+        T::eq(
+            &self.values,
+            self.offset,
+            &other.values,
+            other.values.len() - 1,
+        )
+    }
+}
+
 impl<T: CursorValues> Eq for Cursor<T> {}
 
 impl<T: CursorValues> PartialOrd for Cursor<T> {
@@ -156,6 +186,11 @@ impl CursorValues for RowValues {
         l.rows.row(l_idx) == r.rows.row(r_idx)
     }
 
+    fn eq_to_previous(cursor: &Self, idx: usize) -> bool {
+        assert!(idx > 0);
+        cursor.rows.row(idx) == cursor.rows.row(idx - 1)
+    }
+
     fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
         l.rows.row(l_idx).cmp(&r.rows.row(r_idx))
     }
@@ -188,6 +223,11 @@ impl<T: ArrowNativeTypeOp> CursorValues for PrimitiveValues<T> {
         l.0[l_idx].is_eq(r.0[r_idx])
     }
 
+    fn eq_to_previous(cursor: &Self, idx: usize) -> bool {
+        assert!(idx > 0);
+        cursor.0[idx].is_eq(cursor.0[idx - 1])
+    }
+
     fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
         l.0[l_idx].compare(r.0[r_idx])
     }
@@ -219,6 +259,11 @@ impl<T: OffsetSizeTrait> CursorValues for ByteArrayValues<T> {
         l.value(l_idx) == r.value(r_idx)
     }
 
+    fn eq_to_previous(cursor: &Self, idx: usize) -> bool {
+        assert!(idx > 0);
+        cursor.value(idx) == cursor.value(idx - 1)
+    }
+
     fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
         l.value(l_idx).cmp(r.value(r_idx))
     }
@@ -284,6 +329,15 @@ impl<T: CursorValues> CursorValues for ArrayValues<T> {
         }
     }
 
+    fn eq_to_previous(cursor: &Self, idx: usize) -> bool {
+        assert!(idx > 0);
+        match (cursor.is_null(idx), cursor.is_null(idx - 1)) {
+            (true, true) => true,
+            (false, false) => T::eq(&cursor.values, idx, &cursor.values, idx - 1),
+            _ => false,
+        }
+    }
+
     fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering {
         match (l.is_null(l_idx), r.is_null(r_idx)) {
             (true, true) => Ordering::Equal,
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index e0644e3d99e5..458c1c29c0cf 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -97,6 +97,40 @@ pub(crate) struct SortPreservingMergeStream<C: CursorValues> {
     /// Cursors for each input partition. `None` means the input is exhausted
     cursors: Vec<Option<Cursor<C>>>,
 
+    /// Configuration parameter to enable round-robin selection of tied winners of loser tree.
+    ///
+    /// To address the issue of unbalanced polling between partitions due to tie-breakers being based
+    /// on partition index, especially in cases of low cardinality, we are making changes to the winner
+    /// selection mechanism. Previously, partitions with smaller indices were consistently chosen as the winners,
+    /// leading to an uneven distribution of polling. This caused upstream operator buffers for the other partitions
+    /// to grow excessively, as they continued receiving data without consuming it.
+    ///
+    /// For example, an upstream operator like a repartition execution would keep sending data to certain partitions,
+    /// but those partitions wouldn't consume the data if they weren't selected as winners. This resulted in inefficient buffer usage.
+    ///
+    /// To resolve this, we are modifying the tie-breaking logic. Instead of always choosing the partition with the smallest index,
+    /// we now select the partition that has the fewest poll counts for the same value.
+    /// This ensures that multiple partitions with the same value are chosen equally, distributing the polling load in a round-robin fashion.
+    /// This approach balances the workload more effectively across partitions and avoids excessive buffer growth.
+    enable_round_robin_tie_breaker: bool,
+
+    /// Flag indicating whether we are in the mode of round-robin
+    /// tie breaker for the loser tree winners.
+    round_robin_tie_breaker_mode: bool,
+
+    /// Total number of polls returning the same value, as per partition.
+    /// We select the one that has less poll counts for tie-breaker in loser tree.
+    num_of_polled_with_same_value: Vec<usize>,
+
+    /// To keep track of reset counts
+    poll_reset_epochs: Vec<usize>,
+
+    /// Current reset count
+    current_reset_epoch: usize,
+
+    /// Stores the previous value of each partitions for tracking the poll counts on the same value.
+    prev_cursors: Vec<Option<Cursor<C>>>,
+
     /// Optional number of rows to fetch
     fetch: Option<usize>,
 
@@ -118,6 +152,7 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
         batch_size: usize,
         fetch: Option<usize>,
         reservation: MemoryReservation,
+        enable_round_robin_tie_breaker: bool,
     ) -> Self {
         let stream_count = streams.partitions();
 
@@ -127,12 +162,18 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
             metrics,
             aborted: false,
             cursors: (0..stream_count).map(|_| None).collect(),
+            prev_cursors: (0..stream_count).map(|_| None).collect(),
+            round_robin_tie_breaker_mode: false,
+            num_of_polled_with_same_value: vec![0; stream_count],
+            current_reset_epoch: 0,
+            poll_reset_epochs: vec![0; stream_count],
             loser_tree: vec![],
             loser_tree_adjusted: false,
             batch_size,
             fetch,
             produced: 0,
             uninitiated_partitions: (0..stream_count).collect(),
+            enable_round_robin_tie_breaker,
         }
     }
 
@@ -218,7 +259,7 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
             }
 
             let stream_idx = self.loser_tree[0];
-            if self.advance(stream_idx) {
+            if self.advance_cursors(stream_idx) {
                 self.loser_tree_adjusted = false;
                 self.in_progress.push_row(stream_idx);
 
@@ -236,27 +277,53 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
         }
     }
 
+    /// For the given partition, updates the poll count. If the current value is the same
+    /// of the previous value, it increases the count by 1; otherwise, it is reset as 0.
+    fn update_poll_count_on_the_same_value(&mut self, partition_idx: usize) {
+        let cursor = &mut self.cursors[partition_idx];
+
+        // Check if the current partition's poll count is logically "reset"
+        if self.poll_reset_epochs[partition_idx] != self.current_reset_epoch {
+            self.poll_reset_epochs[partition_idx] = self.current_reset_epoch;
+            self.num_of_polled_with_same_value[partition_idx] = 0;
+        }
+
+        if let Some(c) = cursor.as_mut() {
+            // Compare with the last row in the previous batch
+            let prev_cursor = &self.prev_cursors[partition_idx];
+            if c.is_eq_to_prev_one(prev_cursor.as_ref()) {
+                self.num_of_polled_with_same_value[partition_idx] += 1;
+            } else {
+                self.num_of_polled_with_same_value[partition_idx] = 0;
+            }
+        }
+    }
+
     fn fetch_reached(&mut self) -> bool {
         self.fetch
             .map(|fetch| self.produced + self.in_progress.len() >= fetch)
             .unwrap_or(false)
     }
 
-    fn advance(&mut self, stream_idx: usize) -> bool {
-        let slot = &mut self.cursors[stream_idx];
-        match slot.as_mut() {
-            Some(c) => {
-                c.advance();
-                if c.is_finished() {
-                    *slot = None;
-                }
-                true
+    /// Advances the actual cursor. If it reaches its end, update the
+    /// previous cursor with it.
+    ///
+    /// If the given partition is not exhausted, the function returns `true`.
+    fn advance_cursors(&mut self, stream_idx: usize) -> bool {
+        if let Some(cursor) = &mut self.cursors[stream_idx] {
+            let _ = cursor.advance();
+            if cursor.is_finished() {
+                // Take the current cursor, leaving `None` in its place
+                self.prev_cursors[stream_idx] = self.cursors[stream_idx].take();
             }
-            None => false,
+            true
+        } else {
+            false
         }
     }
 
-    /// Returns `true` if the cursor at index `a` is greater than at index `b`
+    /// Returns `true` if the cursor at index `a` is greater than at index `b`.
+    /// In an equality case, it compares the partition indices given.
     #[inline]
     fn is_gt(&self, a: usize, b: usize) -> bool {
         match (&self.cursors[a], &self.cursors[b]) {
@@ -266,6 +333,19 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
         }
     }
 
+    #[inline]
+    fn is_poll_count_gt(&self, a: usize, b: usize) -> bool {
+        let poll_a = self.num_of_polled_with_same_value[a];
+        let poll_b = self.num_of_polled_with_same_value[b];
+        poll_a.cmp(&poll_b).then_with(|| a.cmp(&b)).is_gt()
+    }
+
+    #[inline]
+    fn update_winner(&mut self, cmp_node: usize, winner: &mut usize, challenger: usize) {
+        self.loser_tree[cmp_node] = *winner;
+        *winner = challenger;
+    }
+
     /// Find the leaf node index in the loser tree for the given cursor index
     ///
     /// Note that this is not necessarily a leaf node in the tree, but it can
@@ -327,16 +407,101 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
         self.loser_tree_adjusted = true;
     }
 
-    /// Attempts to update the loser tree, following winner replacement, if possible
+    /// Resets the poll count by incrementing the reset epoch.
+    fn reset_poll_counts(&mut self) {
+        self.current_reset_epoch += 1;
+    }
+
+    /// Handles tie-breaking logic during the adjustment of the loser tree.
+    ///
+    /// When comparing elements from multiple partitions in the `update_loser_tree` process, a tie can occur
+    /// between the current winner and a challenger. This function is invoked when such a tie needs to be
+    /// resolved according to the round-robin tie-breaker mode.
+    ///
+    /// If round-robin tie-breaking is not active, it is enabled, and the poll counts for all elements are reset.
+    /// The function then compares the poll counts of the current winner and the challenger:
+    /// - If the winner remains at the top after the final comparison, it increments the winner's poll count.
+    /// - If the challenger has a lower poll count than the current winner, the challenger becomes the new winner.
+    /// - If the poll counts are equal but the challenger's index is smaller, the challenger is preferred.
+    ///
+    /// # Parameters
+    /// - `cmp_node`: The index of the comparison node in the loser tree where the tie-breaking is happening.
+    /// - `winner`: A mutable reference to the current winner, which may be updated based on the tie-breaking result.
+    /// - `challenger`: The index of the challenger being compared against the winner.
+    ///
+    /// This function ensures fair selection among elements with equal values when tie-breaking mode is enabled,
+    /// aiming to balance the polling across different partitions.
+    #[inline]
+    fn handle_tie(&mut self, cmp_node: usize, winner: &mut usize, challenger: usize) {
+        if !self.round_robin_tie_breaker_mode {
+            self.round_robin_tie_breaker_mode = true;
+            // Reset poll count for tie-breaker
+            self.reset_poll_counts();
+        }
+        // Update poll count if the winner survives in the final match
+        if *winner == self.loser_tree[0] {
+            self.update_poll_count_on_the_same_value(*winner);
+            if self.is_poll_count_gt(*winner, challenger) {
+                self.update_winner(cmp_node, winner, challenger);
+            }
+        } else if challenger < *winner {
+            // If the winner doesn’t survive in the final match, it indicates that the original winner
+            // has moved up in value, so the challenger now becomes the new winner.
+            // This also means that we’re in a new round of the tie breaker,
+            // and the polls count is outdated (though not yet cleaned up).
+            //
+            // By the time we reach this code, both the new winner and the current challenger
+            // have the same value, and neither has an updated polls count.
+            // Therefore, we simply select the one with the smaller index.
+            self.update_winner(cmp_node, winner, challenger);
+        }
+    }
+
+    /// Updates the loser tree to reflect the new winner after the previous winner is consumed.
+    /// This function adjusts the tree by comparing the current winner with challengers from
+    /// other partitions.
+    ///
+    /// If `enable_round_robin_tie_breaker` is true and a tie occurs at the final level, the
+    /// tie-breaker logic will be applied to ensure fair selection among equal elements.
     fn update_loser_tree(&mut self) {
+        // Start with the current winner
         let mut winner = self.loser_tree[0];
-        // Replace overall winner by walking tree of losers
+
+        // Find the leaf node index of the winner in the loser tree.
         let mut cmp_node = self.lt_leaf_node_index(winner);
+
+        // Traverse up the tree to adjust comparisons until reaching the root.
         while cmp_node != 0 {
             let challenger = self.loser_tree[cmp_node];
-            if self.is_gt(winner, challenger) {
-                self.loser_tree[cmp_node] = winner;
-                winner = challenger;
+            // If round-robin tie-breaker is enabled and we're at the final comparison (cmp_node == 1)
+            if self.enable_round_robin_tie_breaker && cmp_node == 1 {
+                match (&self.cursors[winner], &self.cursors[challenger]) {
+                    (Some(ac), Some(bc)) => {
+                        let ord = ac.cmp(bc);
+                        if ord.is_eq() {
+                            self.handle_tie(cmp_node, &mut winner, challenger);
+                        } else {
+                            // Ends of tie breaker
+                            self.round_robin_tie_breaker_mode = false;
+                            if ord.is_gt() {
+                                self.update_winner(cmp_node, &mut winner, challenger);
+                            }
+                        }
+                    }
+                    (None, _) => {
+                        // Challenger wins, update winner
+                        // Ends of tie breaker
+                        self.round_robin_tie_breaker_mode = false;
+                        self.update_winner(cmp_node, &mut winner, challenger);
+                    }
+                    (_, None) => {
+                        // Winner wins again
+                        // Ends of tie breaker
+                        self.round_robin_tie_breaker_mode = false;
+                    }
+                }
+            } else if self.is_gt(winner, challenger) {
+                self.update_winner(cmp_node, &mut winner, challenger);
             }
             cmp_node = self.lt_parent_node_index(cmp_node);
         }
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 31a4ed61cf9e..f17161306c7a 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -82,6 +82,8 @@ pub struct SortPreservingMergeExec {
     fetch: Option<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
     cache: PlanProperties,
+    /// Configuration parameter to enable round-robin selection of tied winners of loser tree.
+    enable_round_robin_repartition: bool,
 }
 
 impl SortPreservingMergeExec {
@@ -94,14 +96,25 @@ impl SortPreservingMergeExec {
             metrics: ExecutionPlanMetricsSet::new(),
             fetch: None,
             cache,
+            enable_round_robin_repartition: true,
         }
     }
+
     /// Sets the number of rows to fetch
     pub fn with_fetch(mut self, fetch: Option<usize>) -> Self {
         self.fetch = fetch;
         self
     }
 
+    /// Sets the selection strategy of tied winners of the loser tree algorithm
+    pub fn with_round_robin_repartition(
+        mut self,
+        enable_round_robin_repartition: bool,
+    ) -> Self {
+        self.enable_round_robin_repartition = enable_round_robin_repartition;
+        self
+    }
+
     /// Input schema
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
@@ -182,6 +195,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
             metrics: self.metrics.clone(),
             fetch: limit,
             cache: self.cache.clone(),
+            enable_round_robin_repartition: true,
         }))
     }
 
@@ -281,6 +295,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
                     .with_batch_size(context.session_config().batch_size())
                     .with_fetch(self.fetch)
                     .with_reservation(reservation)
+                    .with_round_robin_tie_breaker(self.enable_round_robin_repartition)
                     .build()?;
 
                 debug!("Got stream result from SortPreservingMergeStream::new_from_receivers");
@@ -312,10 +327,12 @@ mod tests {
     use std::time::Duration;
 
     use super::*;
+    use crate::coalesce_batches::CoalesceBatchesExec;
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::expressions::col;
     use crate::memory::MemoryExec;
     use crate::metrics::{MetricValue, Timestamp};
+    use crate::repartition::RepartitionExec;
     use crate::sorts::sort::SortExec;
     use crate::stream::RecordBatchReceiverStream;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
@@ -326,10 +343,12 @@ mod tests {
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
+    use arrow_array::Int64Array;
     use arrow_schema::SchemaRef;
     use datafusion_common::{assert_batches_eq, assert_contains, DataFusionError};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_execution::RecordBatchStream;
     use datafusion_physical_expr::expressions::Column;
     use datafusion_physical_expr::EquivalenceProperties;
@@ -338,6 +357,80 @@ mod tests {
     use futures::{FutureExt, Stream, StreamExt};
     use tokio::time::timeout;
 
+    // The number in the function is highly related to the memory limit we are testing
+    // any change of the constant should be aware of
+    fn generate_task_ctx_for_round_robin_tie_breaker() -> Result<Arc<TaskContext>> {
+        let runtime = RuntimeEnvBuilder::new()
+            .with_memory_limit(20_000_000, 1.0)
+            .build_arc()?;
+        let config = SessionConfig::new();
+        let task_ctx = TaskContext::default()
+            .with_runtime(runtime)
+            .with_session_config(config);
+        Ok(Arc::new(task_ctx))
+    }
+    // The number in the function is highly related to the memory limit we are testing,
+    // any change of the constant should be aware of
+    fn generate_spm_for_round_robin_tie_breaker(
+        enable_round_robin_repartition: bool,
+    ) -> Result<Arc<SortPreservingMergeExec>> {
+        let target_batch_size = 12500;
+        let row_size = 12500;
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1; row_size]));
+        let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a"); row_size]));
+        let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size]));
+        let rb = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
+
+        let rbs = (0..1024).map(|_| rb.clone()).collect::<Vec<_>>();
+
+        let schema = rb.schema();
+        let sort = vec![
+            PhysicalSortExpr {
+                expr: col("b", &schema).unwrap(),
+                options: Default::default(),
+            },
+            PhysicalSortExpr {
+                expr: col("c", &schema).unwrap(),
+                options: Default::default(),
+            },
+        ];
+
+        let exec = MemoryExec::try_new(&[rbs], schema, None).unwrap();
+        let repartition_exec =
+            RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(2))?;
+        let coalesce_batches_exec =
+            CoalesceBatchesExec::new(Arc::new(repartition_exec), target_batch_size);
+        let spm = SortPreservingMergeExec::new(sort, Arc::new(coalesce_batches_exec))
+            .with_round_robin_repartition(enable_round_robin_repartition);
+        Ok(Arc::new(spm))
+    }
+
+    /// This test verifies that memory usage stays within limits when the tie breaker is enabled.
+    /// Any errors here could indicate unintended changes in tie breaker logic.
+    ///
+    /// Note: If you adjust constants in this test, ensure that memory usage differs
+    /// based on whether the tie breaker is enabled or disabled.
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_round_robin_tie_breaker_success() -> Result<()> {
+        let task_ctx = generate_task_ctx_for_round_robin_tie_breaker()?;
+        let spm = generate_spm_for_round_robin_tie_breaker(true)?;
+        let _collected = collect(spm, task_ctx).await.unwrap();
+        Ok(())
+    }
+
+    /// This test verifies that memory usage stays within limits when the tie breaker is enabled.
+    /// Any errors here could indicate unintended changes in tie breaker logic.
+    ///
+    /// Note: If you adjust constants in this test, ensure that memory usage differs
+    /// based on whether the tie breaker is enabled or disabled.
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_round_robin_tie_breaker_fail() -> Result<()> {
+        let task_ctx = generate_task_ctx_for_round_robin_tie_breaker()?;
+        let spm = generate_spm_for_round_robin_tie_breaker(false)?;
+        let _err = collect(spm, task_ctx).await.unwrap_err();
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_merge_interleave() {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs
index ad640d8e8470..e8330a7cabc0 100644
--- a/datafusion/physical-plan/src/sorts/streaming_merge.rs
+++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs
@@ -36,7 +36,7 @@ macro_rules! primitive_merge_helper {
 }
 
 macro_rules! merge_helper {
-    ($t:ty, $sort:ident, $streams:ident, $schema:ident, $tracking_metrics:ident, $batch_size:ident, $fetch:ident, $reservation:ident) => {{
+    ($t:ty, $sort:ident, $streams:ident, $schema:ident, $tracking_metrics:ident, $batch_size:ident, $fetch:ident, $reservation:ident, $enable_round_robin_tie_breaker:ident) => {{
         let streams = FieldCursorStream::<$t>::new($sort, $streams);
         return Ok(Box::pin(SortPreservingMergeStream::new(
             Box::new(streams),
@@ -45,6 +45,7 @@ macro_rules! merge_helper {
             $batch_size,
             $fetch,
             $reservation,
+            $enable_round_robin_tie_breaker,
         )));
     }};
 }
@@ -58,11 +59,15 @@ pub struct StreamingMergeBuilder<'a> {
     batch_size: Option<usize>,
     fetch: Option<usize>,
     reservation: Option<MemoryReservation>,
+    enable_round_robin_tie_breaker: bool,
 }
 
 impl<'a> StreamingMergeBuilder<'a> {
     pub fn new() -> Self {
-        Self::default()
+        Self {
+            enable_round_robin_tie_breaker: true,
+            ..Default::default()
+        }
     }
 
     pub fn with_streams(mut self, streams: Vec<SendableRecordBatchStream>) -> Self {
@@ -100,6 +105,14 @@ impl<'a> StreamingMergeBuilder<'a> {
         self
     }
 
+    pub fn with_round_robin_tie_breaker(
+        mut self,
+        enable_round_robin_tie_breaker: bool,
+    ) -> Self {
+        self.enable_round_robin_tie_breaker = enable_round_robin_tie_breaker;
+        self
+    }
+
     pub fn build(self) -> Result<SendableRecordBatchStream> {
         let Self {
             streams,
@@ -109,6 +122,7 @@ impl<'a> StreamingMergeBuilder<'a> {
             reservation,
             fetch,
             expressions,
+            enable_round_robin_tie_breaker,
         } = self;
 
         // Early return if streams or expressions are empty
@@ -141,11 +155,11 @@ impl<'a> StreamingMergeBuilder<'a> {
             let sort = expressions[0].clone();
             let data_type = sort.expr.data_type(schema.as_ref())?;
             downcast_primitive! {
-                data_type => (primitive_merge_helper, sort, streams, schema, metrics, batch_size, fetch, reservation),
-                DataType::Utf8 => merge_helper!(StringArray, sort, streams, schema, metrics, batch_size, fetch, reservation)
-                DataType::LargeUtf8 => merge_helper!(LargeStringArray, sort, streams, schema, metrics, batch_size, fetch, reservation)
-                DataType::Binary => merge_helper!(BinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation)
-                DataType::LargeBinary => merge_helper!(LargeBinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation)
+                data_type => (primitive_merge_helper, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker),
+                DataType::Utf8 => merge_helper!(StringArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker)
+                DataType::LargeUtf8 => merge_helper!(LargeStringArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker)
+                DataType::Binary => merge_helper!(BinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker)
+                DataType::LargeBinary => merge_helper!(LargeBinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker)
                 _ => {}
             }
         }
@@ -163,6 +177,7 @@ impl<'a> StreamingMergeBuilder<'a> {
             batch_size,
             fetch,
             reservation,
+            enable_round_robin_tie_breaker,
         )))
     }
 }

From 2f745b7ddf7c85c2896106a8281a8309d3286681 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 31 Oct 2024 10:03:59 -0400
Subject: [PATCH 13/32] Prepare for 42.2.0 release (#13191) (#13193)

* chore: Update version to 42.2.0

* Update changelog

* update docs
---
 Cargo.toml                        |  50 +++++------
 datafusion-cli/Cargo.lock         | 141 ++++++++++++++++--------------
 datafusion-cli/Cargo.toml         |   4 +-
 dev/changelog/42.2.0.md           |  37 ++++++++
 docs/source/user-guide/configs.md |   2 +-
 5 files changed, 139 insertions(+), 95 deletions(-)
 create mode 100644 dev/changelog/42.2.0.md

diff --git a/Cargo.toml b/Cargo.toml
index 2f8896f7d90c..21079c484ce0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,7 +60,7 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
 rust-version = "1.79"
-version = "42.1.0"
+version = "42.2.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -93,30 +93,30 @@ bytes = "1.4"
 chrono = { version = "0.4.38", default-features = false }
 ctor = "0.2.0"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "42.1.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "42.1.0" }
-datafusion-common = { path = "datafusion/common", version = "42.1.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.1.0" }
-datafusion-execution = { path = "datafusion/execution", version = "42.1.0" }
-datafusion-expr = { path = "datafusion/expr", version = "42.1.0" }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "42.1.0" }
-datafusion-functions = { path = "datafusion/functions", version = "42.1.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.1.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "42.1.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.1.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "42.1.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.1.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.1.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.1.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.1.0" }
-datafusion-proto = { path = "datafusion/proto", version = "42.1.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "42.1.0" }
-datafusion-sql = { path = "datafusion/sql", version = "42.1.0" }
-datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.1.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "42.1.0" }
+datafusion = { path = "datafusion/core", version = "42.2.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "42.2.0" }
+datafusion-common = { path = "datafusion/common", version = "42.2.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.2.0" }
+datafusion-execution = { path = "datafusion/execution", version = "42.2.0" }
+datafusion-expr = { path = "datafusion/expr", version = "42.2.0" }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "42.2.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "42.2.0" }
+datafusion-functions = { path = "datafusion/functions", version = "42.2.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.2.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.2.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.2.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "42.2.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.2.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "42.2.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.2.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.2.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.2.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.2.0" }
+datafusion-proto = { path = "datafusion/proto", version = "42.2.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "42.2.0" }
+datafusion-sql = { path = "datafusion/sql", version = "42.2.0" }
+datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.2.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "42.2.0" }
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index ca67e3e4f531..541d464d381f 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -84,9 +84,9 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.6.16"
+version = "0.6.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f581a3637024bb8f62027f3ab6151f502090388c1dad05b01c70fb733b33c20"
+checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -99,33 +99,33 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.8"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
+checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
+checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.1"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
+checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.5"
+version = "3.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abbf7eaf69f3b46121caf74645dd5d3078b4b205a2513930da0033156682cd28"
+checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
 dependencies = [
  "anstyle",
  "windows-sys 0.59.0",
@@ -456,9 +456,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
 [[package]]
 name = "aws-config"
-version = "1.5.8"
+version = "1.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7198e6f03240fdceba36656d8be440297b6b82270325908c7381f37d826a74f6"
+checksum = "2d6448cfb224dd6a9b9ac734f58622dd0d4751f3589f3b777345745f46b2eb14"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -523,9 +523,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.46.0"
+version = "1.47.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dc2faec3205d496c7e57eff685dd944203df7ce16a4116d0281c44021788a7b"
+checksum = "a8776850becacbd3a82a4737a9375ddb5c6832a51379f24443a98e61513f852c"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -545,9 +545,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.47.0"
+version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c93c241f52bc5e0476e259c953234dab7e2a35ee207ee202e86c0095ec4951dc"
+checksum = "0007b5b8004547133319b6c4e87193eee2a0bcb3e4c18c75d09febe9dab7b383"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -567,9 +567,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.46.0"
+version = "1.47.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b259429be94a3459fa1b00c5684faee118d74f9577cc50aebadc36e507c63b5f"
+checksum = "9fffaa356e7f1c725908b75136d53207fa714e348f365671df14e95a60530ad3"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -590,9 +590,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.2.4"
+version = "1.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68"
+checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-http",
@@ -938,6 +938,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chrono"
 version = "0.4.38"
@@ -1023,9 +1029,9 @@ dependencies = [
 
 [[package]]
 name = "colorchoice"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
+checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
 
 [[package]]
 name = "comfy-table"
@@ -1182,7 +1188,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1239,7 +1245,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -1252,7 +1258,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "assert_cmd",
@@ -1282,7 +1288,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1306,7 +1312,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "log",
  "tokio",
@@ -1314,7 +1320,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -1333,7 +1339,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1355,7 +1361,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1365,7 +1371,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1390,7 +1396,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1409,7 +1415,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1421,7 +1427,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1442,7 +1448,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "datafusion-common",
  "datafusion-expr",
@@ -1455,7 +1461,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1463,7 +1469,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1481,7 +1487,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1507,7 +1513,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1519,7 +1525,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1533,7 +1539,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1566,7 +1572,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "42.1.0"
+version = "42.2.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -2136,7 +2142,7 @@ dependencies = [
  "http 1.1.0",
  "hyper 1.5.0",
  "hyper-util",
- "rustls 0.23.15",
+ "rustls 0.23.16",
  "rustls-native-certs 0.8.0",
  "rustls-pki-types",
  "tokio",
@@ -2146,9 +2152,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.9"
+version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b"
+checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -2371,9 +2377,9 @@ dependencies = [
 
 [[package]]
 name = "libm"
-version = "0.2.8"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
 
 [[package]]
 name = "libmimalloc-sys"
@@ -2506,7 +2512,7 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
 dependencies = [
  "bitflags 2.6.0",
  "cfg-if",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "libc",
 ]
 
@@ -2915,7 +2921,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
- "rustls 0.23.15",
+ "rustls 0.23.16",
  "socket2",
  "thiserror",
  "tokio",
@@ -2932,7 +2938,7 @@ dependencies = [
  "rand",
  "ring",
  "rustc-hash",
- "rustls 0.23.15",
+ "rustls 0.23.16",
  "slab",
  "thiserror",
  "tinyvec",
@@ -2941,10 +2947,11 @@ dependencies = [
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.5"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b"
+checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780"
 dependencies = [
+ "cfg_aliases 0.2.1",
  "libc",
  "once_cell",
  "socket2",
@@ -3064,9 +3071,9 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
 
 [[package]]
 name = "reqwest"
-version = "0.12.8"
+version = "0.12.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b"
+checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f"
 dependencies = [
  "base64 0.22.1",
  "bytes",
@@ -3087,7 +3094,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.15",
+ "rustls 0.23.16",
  "rustls-native-certs 0.8.0",
  "rustls-pemfile 2.2.0",
  "rustls-pki-types",
@@ -3181,9 +3188,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.37"
+version = "0.38.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
+checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a"
 dependencies = [
  "bitflags 2.6.0",
  "errno",
@@ -3206,9 +3213,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.15"
+version = "0.23.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993"
+checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
 dependencies = [
  "once_cell",
  "ring",
@@ -3393,18 +3400,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.213"
+version = "1.0.214"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1"
+checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.213"
+version = "1.0.214"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5"
+checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3774,7 +3781,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.15",
+ "rustls 0.23.16",
  "rustls-pki-types",
  "tokio",
 ]
@@ -4075,9 +4082,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
 
 [[package]]
 name = "wasm-streams"
-version = "0.4.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
 dependencies = [
  "futures-util",
  "js-sys",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 8e4352612889..049f87f08e69 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-cli"
 description = "Command Line Client for DataFusion query engine."
-version = "42.1.0"
+version = "42.2.0"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
 edition = "2021"
 keywords = ["arrow", "datafusion", "query", "sql"]
@@ -39,7 +39,7 @@ aws-sdk-sts = "1.43.0"
 # end pin aws-sdk crates
 aws-credential-types = "1.2.0"
 clap = { version = "4.5.16", features = ["derive", "cargo"] }
-datafusion = { path = "../datafusion/core", version = "42.1.0", features = [
+datafusion = { path = "../datafusion/core", version = "42.2.0", features = [
     "avro",
     "crypto_expressions",
     "datetime_expressions",
diff --git a/dev/changelog/42.2.0.md b/dev/changelog/42.2.0.md
new file mode 100644
index 000000000000..6c907162c65e
--- /dev/null
+++ b/dev/changelog/42.2.0.md
@@ -0,0 +1,37 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 42.2.0 Changelog
+
+This release consists of 1 commits from 1 contributor. See credits at the end of this changelog for more information.
+
+**Other:**
+
+-
+- Backport config option `skip_physical_aggregate_schema_check` #13176 to 42 [#13189](https://github.com/apache/datafusion/pull/13189) (alamb)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     1	Andrew Lamb
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 015d35cf2455..bd8591b5d723 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -67,7 +67,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.created_by                                 | datafusion version 42.1.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.created_by                                 | datafusion version 42.2.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.execution.parquet.encoding                                   | NULL                      | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                     |

From cf76421d7f3f30a42f0bad473d3ebdc611b83ba3 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 31 Oct 2024 16:15:26 +0100
Subject: [PATCH 14/32] Fix rendering of dictionary empty string values in SLT
 tests (#13198)

---
 .../src/engines/datafusion_engine/normalize.rs             | 7 ++++++-
 .../sqllogictest/test_files/string/dictionary_utf8.slt     | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index a88ee9d08df0..8337d2e9a39c 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::engines::output::DFColumnType;
-use arrow::array::Array;
+use arrow::array::{Array, AsArray};
 use arrow::datatypes::Fields;
 use arrow::util::display::ArrayFormatter;
 use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
@@ -238,6 +238,11 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result<String> {
                 col,
                 row
             ))),
+            DataType::Dictionary(_, _) => {
+                let dict = col.as_any_dictionary();
+                let key = dict.normalized_keys()[row];
+                Ok(cell_to_string(dict.values(), key)?)
+            }
             _ => {
                 let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS);
                 Ok(f.unwrap().value(row).to_string())
diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
index 04bd00e316e8..8a2916f669d0 100644
--- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
+++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
@@ -37,6 +37,11 @@ select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base;
 statement ok
 drop table test_source
 
+query T
+SELECT arrow_cast('', 'Dictionary(Int32, Utf8)');
+----
+(empty)
+
 # TODO: move it back to `string_query.slt.part` after fixing the issue
 # see detail: https://github.com/apache/datafusion/issues/12637
 # Test pattern with wildcard characters

From 68c042d13387a9128608b838065023c18c8e61a4 Mon Sep 17 00:00:00 2001
From: Michael J Ward <Michael-J-Ward@users.noreply.github.com>
Date: Thu, 31 Oct 2024 10:17:36 -0500
Subject: [PATCH 15/32] fix: default UDWFImpl::expressions returns all
 expressions (#13169)

* fix: default UDWFImpl::expressions returns all expressions

* Add unit test to check for window function inputs

* Add unit test to catch errors in udwf with multiple column arguments

* remove unnecessary qualification from user defined window test

* cargo fmt

---------

Co-authored-by: Tim Saucer <timsaucer@gmail.com>
---
 .../user_defined_window_functions.rs          | 129 +++++++++++++++++-
 datafusion/expr/src/udwf.rs                   |   5 +-
 2 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index 8fe028eedd44..10ee0c5cd2dc 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -29,14 +29,20 @@ use std::{
 
 use arrow::array::AsArray;
 use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray};
-use arrow_schema::{DataType, Field};
+use arrow_schema::{DataType, Field, Schema};
 use datafusion::{assert_batches_eq, prelude::SessionContext};
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{
-    PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl,
+    PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDF, WindowUDFImpl,
 };
-use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
+use datafusion_functions_window_common::{
+    expr::ExpressionArgs, field::WindowUDFFieldArgs,
+};
+use datafusion_physical_expr::{
+    expressions::{col, lit},
+    PhysicalExpr,
+};
 
 /// A query with a window function evaluated over the entire partition
 const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \
@@ -641,3 +647,120 @@ fn odd_count_arr(arr: &Int64Array, num_rows: usize) -> ArrayRef {
     let array: Int64Array = std::iter::repeat(odd_count(arr)).take(num_rows).collect();
     Arc::new(array)
 }
+
+#[derive(Debug)]
+struct VariadicWindowUDF {
+    signature: Signature,
+}
+
+impl VariadicWindowUDF {
+    fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Any(0),
+                    TypeSignature::Any(1),
+                    TypeSignature::Any(2),
+                    TypeSignature::Any(3),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl WindowUDFImpl for VariadicWindowUDF {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "variadic_window_udf"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn partition_evaluator(
+        &self,
+        _: PartitionEvaluatorArgs,
+    ) -> Result<Box<dyn PartitionEvaluator>> {
+        unimplemented!("unnecessary for testing");
+    }
+
+    fn field(&self, _: WindowUDFFieldArgs) -> Result<Field> {
+        unimplemented!("unnecessary for testing");
+    }
+}
+
+#[test]
+// Fixes: default implementation of `WindowUDFImpl::expressions`
+// returns all input expressions to the user-defined window
+// function unmodified.
+//
+// See: https://github.com/apache/datafusion/pull/13169
+fn test_default_expressions() -> Result<()> {
+    let udwf = WindowUDF::from(VariadicWindowUDF::new());
+
+    let field_a = Field::new("a", DataType::Int32, false);
+    let field_b = Field::new("b", DataType::Float32, false);
+    let field_c = Field::new("c", DataType::Boolean, false);
+    let schema = Schema::new(vec![field_a, field_b, field_c]);
+
+    let test_cases = vec![
+        //
+        // Zero arguments
+        //
+        vec![],
+        //
+        // Single argument
+        //
+        vec![col("a", &schema)?],
+        vec![lit(1)],
+        //
+        // Two arguments
+        //
+        vec![col("a", &schema)?, col("b", &schema)?],
+        vec![col("a", &schema)?, lit(2)],
+        vec![lit(false), col("a", &schema)?],
+        //
+        // Three arguments
+        //
+        vec![col("a", &schema)?, col("b", &schema)?, col("c", &schema)?],
+        vec![col("a", &schema)?, col("b", &schema)?, lit(false)],
+        vec![col("a", &schema)?, lit(0.5), col("c", &schema)?],
+        vec![lit(3), col("b", &schema)?, col("c", &schema)?],
+    ];
+
+    for input_exprs in &test_cases {
+        let input_types = input_exprs
+            .iter()
+            .map(|expr: &Arc<dyn PhysicalExpr>| expr.data_type(&schema).unwrap())
+            .collect::<Vec<_>>();
+        let expr_args = ExpressionArgs::new(input_exprs, &input_types);
+
+        let ret_exprs = udwf.expressions(expr_args);
+
+        // Verify same number of input expressions are returned
+        assert_eq!(
+            input_exprs.len(),
+            ret_exprs.len(),
+            "\nInput expressions: {:?}\nReturned expressions: {:?}",
+            input_exprs,
+            ret_exprs
+        );
+
+        // Compares each returned expression with original input expressions
+        for (expected, actual) in input_exprs.iter().zip(&ret_exprs) {
+            assert_eq!(
+                format!("{expected:?}"),
+                format!("{actual:?}"),
+                "\nInput expressions: {:?}\nReturned expressions: {:?}",
+                input_exprs,
+                ret_exprs
+            );
+        }
+    }
+    Ok(())
+}
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 6ab94c1e841a..124625280670 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -312,10 +312,7 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
 
     /// Returns the expressions that are passed to the [`PartitionEvaluator`].
     fn expressions(&self, expr_args: ExpressionArgs) -> Vec<Arc<dyn PhysicalExpr>> {
-        expr_args
-            .input_exprs()
-            .first()
-            .map_or(vec![], |expr| vec![Arc::clone(expr)])
+        expr_args.input_exprs().into()
     }
 
     /// Invoke the function, returning the [`PartitionEvaluator`] instance

From 7ae1ccb4971474d376e5bed1d7116fcaf23f906f Mon Sep 17 00:00:00 2001
From: JasonLi <lijingxuan92@126.com>
Date: Fri, 1 Nov 2024 02:11:32 +0800
Subject: [PATCH 16/32] Improve push down filter of join (#13184)

* improve push down filter of join
---
 datafusion/optimizer/src/push_down_filter.rs  | 337 ++++++++--
 datafusion/optimizer/src/utils.rs             |   8 +
 .../join_disable_repartition_joins.slt        |  23 +-
 .../test_files/push_down_filter_join.slt      | 612 ++++++++++++++++++
 4 files changed, 909 insertions(+), 71 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/push_down_filter_join.slt

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index a0262d7d95df..3b3693cc9405 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -36,7 +36,9 @@ use datafusion_expr::{
 };
 
 use crate::optimizer::ApplyOrder;
-use crate::utils::{has_all_column_refs, is_restrict_null_predicate};
+use crate::utils::{
+    contain_all_columns, has_all_column_refs, is_restrict_null_predicate,
+};
 use crate::{OptimizerConfig, OptimizerRule};
 
 /// Optimizer rule for pushing (moving) filter expressions down in a plan so
@@ -214,19 +216,19 @@ impl<'a> ColumnChecker<'a> {
     }
 
     /// Return true if the expression references only columns from the left side of the join
-    fn is_left_only(&mut self, predicate: &Expr) -> bool {
+    fn left_only(&mut self, column_refs: &HashSet<&Column>) -> bool {
         if self.left_columns.is_none() {
             self.left_columns = Some(schema_columns(self.left_schema));
         }
-        has_all_column_refs(predicate, self.left_columns.as_ref().unwrap())
+        contain_all_columns(column_refs, self.left_columns.as_ref().unwrap())
     }
 
     /// Return true if the expression references only columns from the right side of the join
-    fn is_right_only(&mut self, predicate: &Expr) -> bool {
+    fn right_only(&mut self, column_refs: &HashSet<&Column>) -> bool {
         if self.right_columns.is_none() {
             self.right_columns = Some(schema_columns(self.right_schema));
         }
-        has_all_column_refs(predicate, self.right_columns.as_ref().unwrap())
+        contain_all_columns(column_refs, self.right_columns.as_ref().unwrap())
     }
 }
 
@@ -410,10 +412,13 @@ fn extract_or_clause(expr: &Expr, schema_columns: &HashSet<Column>) -> Option<Ex
 /// push down join/cross-join
 fn push_down_all_join(
     predicates: Vec<Expr>,
-    inferred_join_predicates: Vec<Expr>,
+    inferred_from_predicates: Vec<Expr>,
     mut join: Join,
     on_filter: Vec<Expr>,
+    inferred_from_on_filter: Vec<Expr>,
 ) -> Result<Transformed<LogicalPlan>> {
+    assert_ne!(join.join_type, JoinType::Full);
+
     let is_inner_join = join.join_type == JoinType::Inner;
     // Get pushable predicates from current optimizer state
     let (left_preserved, right_preserved) = lr_is_preserved(join.join_type);
@@ -429,14 +434,24 @@ fn push_down_all_join(
     let mut keep_predicates = vec![];
     let mut join_conditions = vec![];
     let mut checker = ColumnChecker::new(left_schema, right_schema);
+
     for predicate in predicates {
-        if left_preserved && checker.is_left_only(&predicate) {
+        let columns = predicate.column_refs();
+        macro_rules! restrict_null {
+            () => {{
+                let predicate_cloned = predicate.clone();
+                let cols = columns.iter().cloned();
+                is_restrict_null_predicate(predicate_cloned, cols).unwrap_or(false)
+            }};
+        }
+
+        if checker.left_only(&columns) && (left_preserved || restrict_null!()) {
             left_push.push(predicate);
-        } else if right_preserved && checker.is_right_only(&predicate) {
+        } else if checker.right_only(&columns) && (right_preserved || restrict_null!()) {
             right_push.push(predicate);
         } else if is_inner_join && can_evaluate_as_join_condition(&predicate)? {
-            // Here we do not differ it is eq or non-eq predicate, ExtractEquijoinPredicate will extract the eq predicate
-            // and convert to the join on condition
+            // Here we do not differ it is eq or non-eq predicate, ExtractEquijoinPredicate will
+            // extract the eq predicate and convert to the join on condition
             join_conditions.push(predicate);
         } else {
             keep_predicates.push(predicate);
@@ -444,10 +459,13 @@ fn push_down_all_join(
     }
 
     // For infer predicates, if they can not push through join, just drop them
-    for predicate in inferred_join_predicates {
-        if left_preserved && checker.is_left_only(&predicate) {
+    // Because we check whether it is_restrict_null in the process of Infer, there is no need to
+    // check again
+    for predicate in inferred_from_predicates {
+        let columns = predicate.column_refs();
+        if checker.left_only(&columns) {
             left_push.push(predicate);
-        } else if right_preserved && checker.is_right_only(&predicate) {
+        } else if checker.right_only(&columns) {
             right_push.push(predicate);
         }
     }
@@ -455,15 +473,24 @@ fn push_down_all_join(
     let mut on_filter_join_conditions = vec![];
     let (on_left_preserved, on_right_preserved) = on_lr_is_preserved(join.join_type);
 
-    if !on_filter.is_empty() {
-        for on in on_filter {
-            if on_left_preserved && checker.is_left_only(&on) {
-                left_push.push(on)
-            } else if on_right_preserved && checker.is_right_only(&on) {
-                right_push.push(on)
-            } else {
-                on_filter_join_conditions.push(on)
-            }
+    for on in on_filter {
+        let columns = on.column_refs();
+        if on_left_preserved && checker.left_only(&columns) {
+            left_push.push(on)
+        } else if on_right_preserved && checker.right_only(&columns) {
+            right_push.push(on)
+        } else {
+            on_filter_join_conditions.push(on)
+        }
+    }
+
+    // For infer predicates, if they can not push through join, just drop them
+    for on in inferred_from_on_filter {
+        let columns = on.column_refs();
+        if on_left_preserved && checker.left_only(&columns) {
+            left_push.push(on)
+        } else if on_right_preserved && checker.right_only(&columns) {
+            right_push.push(on)
         }
     }
 
@@ -519,6 +546,17 @@ fn push_down_join(
     join: Join,
     parent_predicate: Option<&Expr>,
 ) -> Result<Transformed<LogicalPlan>> {
+    if matches!(join.join_type, JoinType::Full) {
+        let plan = LogicalPlan::Join(join);
+        return Ok(match parent_predicate {
+            Some(predicate) => Transformed::yes(LogicalPlan::Filter(Filter::try_new(
+                predicate.clone(),
+                Arc::new(plan),
+            )?)),
+            None => Transformed::no(plan),
+        });
+    }
+
     // Split the parent predicate into individual conjunctive parts.
     let predicates = parent_predicate
         .map_or_else(Vec::new, |pred| split_conjunction_owned(pred.clone()));
@@ -530,17 +568,24 @@ fn push_down_join(
         .map_or_else(Vec::new, |filter| split_conjunction_owned(filter.clone()));
 
     // Are there any new join predicates that can be inferred from the filter expressions?
-    let inferred_join_predicates =
+    let (inferred_from_predicates, inferred_from_on_filter) =
         infer_join_predicates(&join, &predicates, &on_filters)?;
 
     if on_filters.is_empty()
+        && inferred_from_on_filter.is_empty()
         && predicates.is_empty()
-        && inferred_join_predicates.is_empty()
+        && inferred_from_predicates.is_empty()
     {
         return Ok(Transformed::no(LogicalPlan::Join(join)));
     }
 
-    push_down_all_join(predicates, inferred_join_predicates, join, on_filters)
+    push_down_all_join(
+        predicates,
+        inferred_from_predicates,
+        join,
+        on_filters,
+        inferred_from_on_filter,
+    )
 }
 
 /// Extracts any equi-join join predicates from the given filter expressions.
@@ -553,11 +598,13 @@ fn push_down_join(
 /// * `on_filters` filters from the join ON clause that have not already been
 ///   identified as join predicates
 ///
+/// # Return Value
+/// A tuple of Expr Vec - (inferred_from_predicates, inferred_from_on_filters).
 fn infer_join_predicates(
     join: &Join,
     predicates: &[Expr],
     on_filters: &[Expr],
-) -> Result<Vec<Expr>> {
+) -> Result<(Vec<Expr>, Vec<Expr>)> {
     // Only allow both side key is column.
     let join_col_keys = join
         .on
@@ -578,6 +625,7 @@ fn infer_join_predicates(
         predicates,
         &mut inferred_predicates,
     )?;
+    let inferred_from_predicates = inferred_predicates.take_all();
 
     infer_join_predicates_from_on_filters(
         &join_col_keys,
@@ -585,8 +633,9 @@ fn infer_join_predicates(
         on_filters,
         &mut inferred_predicates,
     )?;
+    let inferred_from_on_filters = inferred_predicates.predicates;
 
-    Ok(inferred_predicates.predicates)
+    Ok((inferred_from_predicates, inferred_from_on_filters))
 }
 
 /// Inferred predicates collector.
@@ -610,6 +659,12 @@ impl InferredPredicates {
         }
     }
 
+    fn take_all(&mut self) -> Vec<Expr> {
+        let mut temp = vec![];
+        std::mem::swap(&mut self.predicates, &mut temp);
+        temp
+    }
+
     fn try_build_predicate(
         &mut self,
         predicate: Expr,
@@ -2107,11 +2162,10 @@ mod tests {
 
         // filter not duplicated nor pushed down - i.e. noop
         let expected = "\
-        Filter: test2.a <= Int64(1)\
-        \n  Left Join: Using test.a = test2.a\
-        \n    TableScan: test, full_filters=[test.a <= Int64(1)]\
-        \n    Projection: test2.a\
-        \n      TableScan: test2";
+        Left Join: Using test.a = test2.a\
+        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
+        \n  Projection: test2.a\
+        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2145,11 +2199,10 @@ mod tests {
 
         // filter not duplicated nor pushed down - i.e. noop
         let expected = "\
-        Filter: test.a <= Int64(1)\
-        \n  Right Join: Using test.a = test2.a\
-        \n    TableScan: test\
-        \n    Projection: test2.a\
-        \n      TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        Right Join: Using test.a = test2.a\
+        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
+        \n  Projection: test2.a\
+        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2187,7 +2240,7 @@ mod tests {
         Left Join: Using test.a = test2.a\
         \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
         \n  Projection: test2.a\
-        \n    TableScan: test2";
+        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2223,7 +2276,7 @@ mod tests {
         // filter sent to right side of join, not duplicated to the left
         let expected = "\
         Right Join: Using test.a = test2.a\
-        \n  TableScan: test\
+        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
         \n  Projection: test2.a\
         \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
@@ -2272,6 +2325,47 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
+    #[test]
+    fn join_with_non_restrict_null_predicate() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let filter = col("test.b").is_null().and(col("test2.b").is_null());
+        let plan = LogicalPlanBuilder::from(left)
+            .join(
+                right,
+                JoinType::Inner,
+                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+                None,
+            )?
+            .filter(filter)?
+            .build()?;
+
+        // not part of the test, just good to know:
+        assert_eq!(
+            format!("{plan}"),
+            "Filter: test.b IS NULL AND test2.b IS NULL\
+             \n  Inner Join: test.a = test2.a\
+             \n    Projection: test.a, test.b, test.c\
+             \n      TableScan: test\
+             \n    Projection: test2.a, test2.b, test2.c\
+             \n      TableScan: test2"
+        );
+
+        let expected = "\
+        Inner Join: test.a = test2.a\
+        \n  Projection: test.a, test.b, test.c\
+        \n    TableScan: test, full_filters=[test.b IS NULL]\
+        \n  Projection: test2.a, test2.b, test2.c\
+        \n    TableScan: test2, full_filters=[test2.b IS NULL]";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
     /// join filter should be completely removed after pushdown
     #[test]
     fn join_filter_removed() -> Result<()> {
@@ -2393,7 +2487,49 @@ mod tests {
         \n  Projection: test.a, test.b, test.c\
         \n    TableScan: test\
         \n  Projection: test2.a, test2.b, test2.c\
-        \n    TableScan: test2, full_filters=[test2.c > UInt32(4)]";
+        \n    TableScan: test2, full_filters=[test2.c > UInt32(4), test2.a > UInt32(1)]";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn left_join_with_non_restrict_null_predicate() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let filter = col("test.b").is_null().and(col("test2.b").is_null());
+        let plan = LogicalPlanBuilder::from(left)
+            .join(
+                right,
+                JoinType::Left,
+                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+                None,
+            )?
+            .filter(filter)?
+            .build()?;
+
+        // not part of the test, just good to know:
+        assert_eq!(
+            format!("{plan}"),
+            "Filter: test.b IS NULL AND test2.b IS NULL\
+            \n  Left Join: test.a = test2.a\
+            \n    Projection: test.a, test.b, test.c\
+            \n      TableScan: test\
+            \n    Projection: test2.a, test2.b, test2.c\
+            \n      TableScan: test2"
+        );
+
+        let expected = "\
+        Filter: test2.b IS NULL\
+        \n  Left Join: test.a = test2.a\
+        \n    Projection: test.a, test.b, test.c\
+        \n      TableScan: test, full_filters=[test.b IS NULL]\
+        \n    Projection: test2.a, test2.b, test2.c\
+        \n      TableScan: test2";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2440,6 +2576,87 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
+    #[test]
+    fn right_join_with_non_restrict_null_predicate() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let filter = col("test.b").is_null().and(col("test2.b").is_null());
+        let plan = LogicalPlanBuilder::from(left)
+            .join(
+                right,
+                JoinType::Right,
+                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+                None,
+            )?
+            .filter(filter)?
+            .build()?;
+
+        // not part of the test, just good to know:
+        assert_eq!(
+            format!("{plan}"),
+            "Filter: test.b IS NULL AND test2.b IS NULL\
+            \n  Right Join: test.a = test2.a\
+            \n    Projection: test.a, test.b, test.c\
+            \n      TableScan: test\
+            \n    Projection: test2.a, test2.b, test2.c\
+            \n      TableScan: test2"
+        );
+
+        let expected = "\
+        Filter: test.b IS NULL\
+        \n  Right Join: test.a = test2.a\
+        \n    Projection: test.a, test.b, test.c\
+        \n      TableScan: test\
+        \n    Projection: test2.a, test2.b, test2.c\
+        \n      TableScan: test2, full_filters=[test2.b IS NULL]";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn full_join() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .project(vec![col("a"), col("b"), col("c")])?
+            .build()?;
+        let filter = col("test.a")
+            .gt(lit(1u32))
+            .and(col("test.b").lt(col("test2.b")))
+            .and(col("test2.c").gt(lit(4u32)));
+        let plan = LogicalPlanBuilder::from(left)
+            .join(
+                right,
+                JoinType::Full,
+                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+                None,
+            )?
+            .filter(filter)?
+            .build()?;
+
+        // not part of the test, just good to know:
+        assert_eq!(
+            format!("{plan}"),
+            "Filter: test.a > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
+            \n  Full Join: test.a = test2.a\
+            \n    Projection: test.a, test.b, test.c\
+            \n      TableScan: test\
+            \n    Projection: test2.a, test2.b, test2.c\
+            \n      TableScan: test2"
+        );
+
+        let expected = &format!("{plan}");
+        assert_optimized_plan_eq(plan, expected)
+    }
+
     /// single table predicate parts of ON condition should not be pushed
     #[test]
     fn full_join_on_with_filter() -> Result<()> {
@@ -2963,11 +3180,10 @@ Projection: a, b
 
         // Inferred the predicate `test1.a <= Int64(1)` and push it down to the left side.
         let expected = "\
-        Filter: test2.a <= Int64(1)\
-        \n  LeftSemi Join: test1.a = test2.a\
-        \n    TableScan: test1, full_filters=[test1.a <= Int64(1)]\
-        \n    Projection: test2.a, test2.b\
-        \n      TableScan: test2";
+        LeftSemi Join: test1.a = test2.a\
+        \n  TableScan: test1, full_filters=[test1.a <= Int64(1)]\
+        \n  Projection: test2.a, test2.b\
+        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3044,11 +3260,10 @@ Projection: a, b
 
         // Inferred the predicate `test2.a <= Int64(1)` and push it down to the right side.
         let expected = "\
-        Filter: test1.a <= Int64(1)\
-        \n  RightSemi Join: test1.a = test2.a\
-        \n    TableScan: test1\
-        \n    Projection: test2.a, test2.b\
-        \n      TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        RightSemi Join: test1.a = test2.a\
+        \n  TableScan: test1, full_filters=[test1.a <= Int64(1)]\
+        \n  Projection: test2.a, test2.b\
+        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3129,12 +3344,11 @@ Projection: a, b
 
         // For left anti, filter of the right side filter can be pushed down.
         let expected = "\
-        Filter: test2.a > UInt32(2)\
-        \n  LeftAnti Join: test1.a = test2.a\
-        \n    Projection: test1.a, test1.b\
-        \n      TableScan: test1, full_filters=[test1.a > UInt32(2)]\
-        \n    Projection: test2.a, test2.b\
-        \n      TableScan: test2";
+        LeftAnti Join: test1.a = test2.a\
+        \n  Projection: test1.a, test1.b\
+        \n    TableScan: test1, full_filters=[test1.a > UInt32(2)]\
+        \n  Projection: test2.a, test2.b\
+        \n    TableScan: test2, full_filters=[test2.a > UInt32(2)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3220,12 +3434,11 @@ Projection: a, b
 
         // For right anti, filter of the left side can be pushed down.
         let expected = "\
-        Filter: test1.a > UInt32(2)\
-        \n  RightAnti Join: test1.a = test2.a\
-        \n    Projection: test1.a, test1.b\
-        \n      TableScan: test1\
-        \n    Projection: test2.a, test2.b\
-        \n      TableScan: test2, full_filters=[test2.a > UInt32(2)]";
+        RightAnti Join: test1.a = test2.a\
+        \n  Projection: test1.a, test1.b\
+        \n    TableScan: test1, full_filters=[test1.a > UInt32(2)]\
+        \n  Projection: test2.a, test2.b\
+        \n    TableScan: test2, full_filters=[test2.a > UInt32(2)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 9f325bc01b1d..9f8d7b7f97a0 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -79,6 +79,14 @@ pub fn optimize_children(
 /// Returns true if `expr` contains all columns in `schema_cols`
 pub(crate) fn has_all_column_refs(expr: &Expr, schema_cols: &HashSet<Column>) -> bool {
     let column_refs = expr.column_refs();
+    contain_all_columns(&column_refs, schema_cols)
+}
+
+/// Returns true if `column_refs` contains all columns in `schema_cols`
+pub(crate) fn contain_all_columns(
+    column_refs: &HashSet<&Column>,
+    schema_cols: &HashSet<Column>,
+) -> bool {
     // note can't use HashSet::intersect because of different types (owned vs References)
     schema_cols
         .iter()
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index c56c59b1bd78..eb47ccdd43ec 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -89,20 +89,25 @@ logical_plan
 02)--Projection: t2.a AS a2, t2.b
 03)----RightSemi Join: t1.d = t2.d, t1.c = t2.c
 04)------SubqueryAlias: t1
-05)--------TableScan: annotated_data projection=[c, d]
-06)------SubqueryAlias: t2
-07)--------Filter: annotated_data.d = Int32(3)
-08)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
+05)--------Filter: annotated_data.d = Int32(3)
+06)----------TableScan: annotated_data projection=[c, d], partial_filters=[annotated_data.d = Int32(3)]
+07)------SubqueryAlias: t2
+08)--------Filter: annotated_data.d = Int32(3)
+09)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
 physical_plan
 01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
-06)--------CoalesceBatchesExec: target_batch_size=8192
-07)----------FilterExec: d@3 = 3
-08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------FilterExec: d@1 = 3
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
+10)--------CoalesceBatchesExec: target_batch_size=8192
+11)----------FilterExec: d@3 = 3
+12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 # preserve_right_semi_join
 query II nosort
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_join.slt b/datafusion/sqllogictest/test_files/push_down_filter_join.slt
new file mode 100644
index 000000000000..f687c542a683
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/push_down_filter_join.slt
@@ -0,0 +1,612 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Contents
+# Inner Join
+#   `WHERE` clause
+#   `ON` clause
+# Left Outer Join
+#   `WHERE` clause
+#   `ON` clause
+# Right Outer Join
+#   `WHERE` clause
+#   `ON` clause
+# Full Outer Join
+#   `WHERE` clause
+#   `ON` clause
+
+# Create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
+(11, 'a'),
+(22, 'b'),
+(33, 'c'),
+(44, 'd'),
+(77, 'e'),
+(88, NULL),
+(99, NULL)
+
+# Create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
+(11, 'z'),
+(22, NULL),
+(44, 'x'),
+(55, 'w'),
+(99, 'u')
+
+# Inner Join
+
+## `WHERE` clause
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+44 d 44 x
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+
+## `ON` clause
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+# Left Outer Join
+
+## `WHERE` clause
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d 44 x
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+88 NULL NULL NULL
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+33 c NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+33 c NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+
+## `ON` clause
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+
+# Right Outer Join
+
+## `WHERE` clause
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+44 d 44 x
+99 NULL 99 u
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+
+## `ON` clause
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+NULL NULL 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+NULL NULL 11 z
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+99 NULL 99 u
+NULL NULL 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+NULL NULL 11 z
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+# Full Outer Join
+
+## `WHERE` clause
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d 44 x
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+88 NULL NULL NULL
+99 NULL 99 u
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+22 b 22 NULL
+33 c NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+33 c NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+
+## `ON` clause
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+NULL NULL 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 11 z
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b NULL NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL 99 u
+NULL NULL 11 z
+NULL NULL 22 NULL
+NULL NULL 44 x
+NULL NULL 55 w
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a NULL NULL
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 11 z
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u
+
+query ITIT
+SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
+----
+11 a 11 z
+22 b 22 NULL
+33 c NULL NULL
+44 d NULL NULL
+77 e NULL NULL
+88 NULL NULL NULL
+99 NULL NULL NULL
+NULL NULL 44 x
+NULL NULL 55 w
+NULL NULL 99 u

From 2047d7fe8577d7c8fb079da6c0be1b544672ade2 Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
Date: Thu, 31 Oct 2024 19:52:33 +0100
Subject: [PATCH 17/32] feat: Implement LeftMark join to fix subquery
 correctness issue (#13134)

* Implement LeftMark join

In https://github.com/apache/datafusion/pull/12945 the emulation of an
mark join has a bug when there is duplicate values in the subquery. This
would be fixable by adding a distinct before the join. But this patch
instead implements a LeftMark join with the desired semantics and uses
that. The LeftMark join will return a row for each in the left input
with an additional column "mark" that is true if there was a match in
the right input and false otherwise.

Note: This patch does not implement the full null semantics for the mark
join described in
http://btw2017.informatik.uni-stuttgart.de/slidesandpapers/F1-10-37/paper_web.pdf
which which will be needed if we and `ANY` subqueries. The version is
this patch the mark column will only be true for had a match and false
when no match was found, never `null`.

* Use mark join in decorrelate subqueries

This fixes a correctness issue in the current approach.

* Add physical plan sqllogictest

* fmt

* Fix join type in doc comment

* Minor clean ups

* Add more documentation to LeftMark join

* Remove qualification

* fix doc

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../common/src/functional_dependencies.rs     |   2 +-
 datafusion/common/src/join_type.rs            |  21 +++
 datafusion/core/src/dataframe/mod.rs          |   6 +-
 .../enforce_distribution.rs                   |  11 +-
 .../src/physical_optimizer/join_selection.rs  |   5 +
 .../src/physical_optimizer/sort_pushdown.rs   |   8 +-
 datafusion/core/tests/fuzz_cases/join_fuzz.rs |  24 +++
 datafusion/expr/src/logical_plan/builder.rs   |  24 +++
 datafusion/expr/src/logical_plan/plan.rs      |   8 +-
 datafusion/optimizer/src/analyzer/subquery.rs |   5 +-
 .../src/decorrelate_predicate_subquery.rs     |  52 ++----
 .../optimizer/src/optimize_projections/mod.rs |   6 +-
 datafusion/optimizer/src/push_down_filter.rs  |  15 +-
 datafusion/optimizer/src/push_down_limit.rs   |   2 +-
 .../physical-expr/src/equivalence/class.rs    |   2 +-
 .../physical-plan/src/joins/hash_join.rs      | 100 +++++++++++
 .../src/joins/nested_loop_join.rs             |  32 ++++
 .../src/joins/sort_merge_join.rs              | 163 ++++++++++++++----
 .../src/joins/symmetric_hash_join.rs          |  30 +++-
 datafusion/physical-plan/src/joins/utils.rs   | 114 ++++++------
 .../proto/datafusion_common.proto             |   4 +-
 datafusion/proto-common/src/from_proto/mod.rs |   1 +
 .../proto-common/src/generated/pbjson.rs      |   6 +
 .../proto-common/src/generated/prost.rs       |   6 +
 datafusion/proto-common/src/to_proto/mod.rs   |   1 +
 .../src/generated/datafusion_proto_common.rs  |   6 +
 .../proto/src/logical_plan/from_proto.rs      |   1 +
 datafusion/proto/src/logical_plan/to_proto.rs |   1 +
 datafusion/sql/src/unparser/plan.rs           |   9 +-
 .../sqllogictest/test_files/subquery.slt      |  98 +++++++----
 .../substrait/src/logical_plan/consumer.rs    |   1 +
 .../substrait/src/logical_plan/producer.rs    |   5 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  18 +-
 33 files changed, 592 insertions(+), 195 deletions(-)

diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index ed9a68c19536..31eafc744390 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -334,7 +334,7 @@ impl FunctionalDependencies {
                 left_func_dependencies.extend(right_func_dependencies);
                 left_func_dependencies
             }
-            JoinType::LeftSemi | JoinType::LeftAnti => {
+            JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
                 // These joins preserve functional dependencies of the left side:
                 left_func_dependencies
             }
diff --git a/datafusion/common/src/join_type.rs b/datafusion/common/src/join_type.rs
index d502e7836da3..e98f34199b27 100644
--- a/datafusion/common/src/join_type.rs
+++ b/datafusion/common/src/join_type.rs
@@ -44,6 +44,20 @@ pub enum JoinType {
     LeftAnti,
     /// Right Anti Join
     RightAnti,
+    /// Left Mark join
+    ///
+    /// Returns one record for each record from the left input. The output contains an additional
+    /// column "mark" which is true if there is at least one match in the right input where the
+    /// join condition evaluates to true. Otherwise, the mark column is false. For more details see
+    /// [1]. This join type is used to decorrelate EXISTS subqueries used inside disjunctive
+    /// predicates.
+    ///
+    /// Note: This we currently do not implement the full null semantics for the mark join described
+    /// in [1] which will be needed if we and ANY subqueries. In our version the mark column will
+    /// only be true for had a match and false when no match was found, never null.
+    ///
+    /// [1]: http://btw2017.informatik.uni-stuttgart.de/slidesandpapers/F1-10-37/paper_web.pdf
+    LeftMark,
 }
 
 impl JoinType {
@@ -63,6 +77,7 @@ impl Display for JoinType {
             JoinType::RightSemi => "RightSemi",
             JoinType::LeftAnti => "LeftAnti",
             JoinType::RightAnti => "RightAnti",
+            JoinType::LeftMark => "LeftMark",
         };
         write!(f, "{join_type}")
     }
@@ -82,6 +97,7 @@ impl FromStr for JoinType {
             "RIGHTSEMI" => Ok(JoinType::RightSemi),
             "LEFTANTI" => Ok(JoinType::LeftAnti),
             "RIGHTANTI" => Ok(JoinType::RightAnti),
+            "LEFTMARK" => Ok(JoinType::LeftMark),
             _ => _not_impl_err!("The join type {s} does not exist or is not implemented"),
         }
     }
@@ -101,6 +117,7 @@ impl Display for JoinSide {
         match self {
             JoinSide::Left => write!(f, "left"),
             JoinSide::Right => write!(f, "right"),
+            JoinSide::None => write!(f, "none"),
         }
     }
 }
@@ -113,6 +130,9 @@ pub enum JoinSide {
     Left,
     /// Right side of the join
     Right,
+    /// Neither side of the join, used for Mark joins where the mark column does not belong to
+    /// either side of the join
+    None,
 }
 
 impl JoinSide {
@@ -121,6 +141,7 @@ impl JoinSide {
         match self {
             JoinSide::Left => JoinSide::Right,
             JoinSide::Right => JoinSide::Left,
+            JoinSide::None => JoinSide::None,
         }
     }
 }
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index e5d352a63c7a..2c71cb80d755 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -3864,6 +3864,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftAnti,
             JoinType::RightAnti,
+            JoinType::LeftMark,
         ];
 
         let default_partition_count = SessionConfig::new().target_partitions();
@@ -3881,7 +3882,10 @@ mod tests {
             let join_schema = physical_plan.schema();
 
             match join_type {
-                JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
+                JoinType::Left
+                | JoinType::LeftSemi
+                | JoinType::LeftAnti
+                | JoinType::LeftMark => {
                     let left_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
                         Arc::new(Column::new_with_schema("c1", &join_schema)?),
                         Arc::new(Column::new_with_schema("c2", &join_schema)?),
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index aa4bcb683749..ff8f16f4ee9c 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -328,7 +328,8 @@ fn adjust_input_keys_ordering(
                     JoinType::Left
                     | JoinType::LeftSemi
                     | JoinType::LeftAnti
-                    | JoinType::Full => vec![],
+                    | JoinType::Full
+                    | JoinType::LeftMark => vec![],
                 };
             }
             PartitionMode::Auto => {
@@ -1959,6 +1960,7 @@ pub(crate) mod tests {
             JoinType::Full,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightSemi,
             JoinType::RightAnti,
         ];
@@ -1981,7 +1983,8 @@ pub(crate) mod tests {
                 | JoinType::Right
                 | JoinType::Full
                 | JoinType::LeftSemi
-                | JoinType::LeftAnti => {
+                | JoinType::LeftAnti
+                | JoinType::LeftMark => {
                     // Join on (a == c)
                     let top_join_on = vec![(
                         Arc::new(Column::new_with_schema("a", &join.schema()).unwrap())
@@ -1999,7 +2002,7 @@ pub(crate) mod tests {
 
                     let expected = match join_type {
                         // Should include 3 RepartitionExecs
-                        JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => vec![
+                        JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => vec![
                             top_join_plan.as_str(),
                             join_plan.as_str(),
                             "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
@@ -2098,7 +2101,7 @@ pub(crate) mod tests {
                     assert_optimized!(expected, top_join.clone(), true);
                     assert_optimized!(expected, top_join, false);
                 }
-                JoinType::LeftSemi | JoinType::LeftAnti => {}
+                JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {}
             }
         }
 
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 1c63df1f0281..2bf706f33d60 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -132,6 +132,9 @@ fn swap_join_type(join_type: JoinType) -> JoinType {
         JoinType::RightSemi => JoinType::LeftSemi,
         JoinType::LeftAnti => JoinType::RightAnti,
         JoinType::RightAnti => JoinType::LeftAnti,
+        JoinType::LeftMark => {
+            unreachable!("LeftMark join type does not support swapping")
+        }
     }
 }
 
@@ -573,6 +576,7 @@ fn hash_join_convert_symmetric_subrule(
                                         hash_join.right().equivalence_properties(),
                                         hash_join.right().schema(),
                                     ),
+                                    JoinSide::None => return false,
                                 };
 
                                 let name = schema.field(*index).name();
@@ -588,6 +592,7 @@ fn hash_join_convert_symmetric_subrule(
                         match side {
                             JoinSide::Left => hash_join.left().output_ordering(),
                             JoinSide::Right => hash_join.right().output_ordering(),
+                            JoinSide::None => unreachable!(),
                         }
                         .map(|p| p.to_vec())
                     })
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index c7677d725b03..fdbda1fe52f7 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -384,6 +384,7 @@ fn try_pushdown_requirements_to_join(
                 return Ok(None);
             }
         }
+        JoinSide::None => return Ok(None),
     };
     let join_type = smj.join_type();
     let probe_side = SortMergeJoinExec::probe_side(&join_type);
@@ -410,6 +411,7 @@ fn try_pushdown_requirements_to_join(
             JoinSide::Right => {
                 required_input_ordering[1] = new_req;
             }
+            JoinSide::None => unreachable!(),
         }
         required_input_ordering
     }))
@@ -421,7 +423,11 @@ fn expr_source_side(
     left_columns_len: usize,
 ) -> Option<JoinSide> {
     match join_type {
-        JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+        JoinType::Inner
+        | JoinType::Left
+        | JoinType::Right
+        | JoinType::Full
+        | JoinType::LeftMark => {
             let all_column_sides = required_exprs
                 .iter()
                 .filter_map(|r| {
diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index 5c03bc3a9110..d7a3460e4987 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -234,6 +234,30 @@ async fn test_anti_join_1k_filtered() {
     .await
 }
 
+#[tokio::test]
+async fn test_left_mark_join_1k() {
+    JoinFuzzTestCase::new(
+        make_staggered_batches(1000),
+        make_staggered_batches(1000),
+        JoinType::LeftMark,
+        None,
+    )
+    .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false)
+    .await
+}
+
+#[tokio::test]
+async fn test_left_mark_join_1k_filtered() {
+    JoinFuzzTestCase::new(
+        make_staggered_batches(1000),
+        make_staggered_batches(1000),
+        JoinType::LeftMark,
+        Some(Box::new(col_lt_col_filter)),
+    )
+    .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false)
+    .await
+}
+
 type JoinFilterBuilder = Box<dyn Fn(Arc<Schema>, Arc<Schema>) -> JoinFilter>;
 
 struct JoinFuzzTestCase {
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index e50ffb59d24a..b7839c4873af 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -20,6 +20,7 @@
 use std::any::Any;
 use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
+use std::iter::once;
 use std::sync::Arc;
 
 use crate::dml::CopyTo;
@@ -1326,6 +1327,25 @@ pub fn change_redundant_column(fields: &Fields) -> Vec<Field> {
         })
         .collect()
 }
+
+fn mark_field(schema: &DFSchema) -> (Option<TableReference>, Arc<Field>) {
+    let mut table_references = schema
+        .iter()
+        .filter_map(|(qualifier, _)| qualifier)
+        .collect::<Vec<_>>();
+    table_references.dedup();
+    let table_reference = if table_references.len() == 1 {
+        table_references.pop().cloned()
+    } else {
+        None
+    };
+
+    (
+        table_reference,
+        Arc::new(Field::new("mark", DataType::Boolean, false)),
+    )
+}
+
 /// Creates a schema for a join operation.
 /// The fields from the left side are first
 pub fn build_join_schema(
@@ -1392,6 +1412,10 @@ pub fn build_join_schema(
                 .map(|(q, f)| (q.cloned(), Arc::clone(f)))
                 .collect()
         }
+        JoinType::LeftMark => left_fields
+            .map(|(q, f)| (q.cloned(), Arc::clone(f)))
+            .chain(once(mark_field(right)))
+            .collect(),
         JoinType::RightSemi | JoinType::RightAnti => {
             // Only use the right side for the schema
             right_fields
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index a301c48659d7..8ba2a44842bc 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -532,7 +532,9 @@ impl LogicalPlan {
                         left.head_output_expr()
                     }
                 }
-                JoinType::LeftSemi | JoinType::LeftAnti => left.head_output_expr(),
+                JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
+                    left.head_output_expr()
+                }
                 JoinType::RightSemi | JoinType::RightAnti => right.head_output_expr(),
             },
             LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => {
@@ -1290,7 +1292,9 @@ impl LogicalPlan {
                         _ => None,
                     }
                 }
-                JoinType::LeftSemi | JoinType::LeftAnti => left.max_rows(),
+                JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
+                    left.max_rows()
+                }
                 JoinType::RightSemi | JoinType::RightAnti => right.max_rows(),
             },
             LogicalPlan::Repartition(Repartition { input, .. }) => input.max_rows(),
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index 0ffc954388f5..fa04835f0967 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -181,7 +181,10 @@ fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Re
                 })?;
                 Ok(())
             }
-            JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
+            JoinType::Left
+            | JoinType::LeftSemi
+            | JoinType::LeftAnti
+            | JoinType::LeftMark => {
                 check_inner_plan(left, can_contain_outer_ref)?;
                 check_inner_plan(right, false)
             }
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index cc1687cffe92..7fdad5ba4b6e 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -17,7 +17,6 @@
 
 //! [`DecorrelatePredicateSubquery`] converts `IN`/`EXISTS` subquery predicates to `SEMI`/`ANTI` joins
 use std::collections::BTreeSet;
-use std::iter;
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -34,11 +33,10 @@ use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::utils::{conjunction, split_conjunction_owned};
 use datafusion_expr::{
-    exists, in_subquery, lit, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter,
+    exists, in_subquery, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter,
     LogicalPlan, LogicalPlanBuilder, Operator,
 };
 
-use itertools::chain;
 use log::debug;
 
 /// Optimizer rule for rewriting predicate(IN/EXISTS) subquery to left semi/anti joins
@@ -138,17 +136,14 @@ fn rewrite_inner_subqueries(
         Expr::Exists(Exists {
             subquery: Subquery { subquery, .. },
             negated,
-        }) => {
-            match existence_join(&cur_input, Arc::clone(&subquery), None, negated, alias)?
-            {
-                Some((plan, exists_expr)) => {
-                    cur_input = plan;
-                    Ok(Transformed::yes(exists_expr))
-                }
-                None if negated => Ok(Transformed::no(not_exists(subquery))),
-                None => Ok(Transformed::no(exists(subquery))),
+        }) => match mark_join(&cur_input, Arc::clone(&subquery), None, negated, alias)? {
+            Some((plan, exists_expr)) => {
+                cur_input = plan;
+                Ok(Transformed::yes(exists_expr))
             }
-        }
+            None if negated => Ok(Transformed::no(not_exists(subquery))),
+            None => Ok(Transformed::no(exists(subquery))),
+        },
         Expr::InSubquery(InSubquery {
             expr,
             subquery: Subquery { subquery, .. },
@@ -159,7 +154,7 @@ fn rewrite_inner_subqueries(
                 .map_or(plan_err!("single expression required."), |output_expr| {
                     Ok(Expr::eq(*expr.clone(), output_expr))
                 })?;
-            match existence_join(
+            match mark_join(
                 &cur_input,
                 Arc::clone(&subquery),
                 Some(in_predicate),
@@ -283,10 +278,6 @@ fn build_join_top(
     build_join(left, subquery, in_predicate_opt, join_type, subquery_alias)
 }
 
-/// Existence join is emulated by adding a non-nullable column to the subquery and using a left join
-/// and checking if the column is null or not. If native support is added for Existence/Mark then
-/// we should use that instead.
-///
 /// This is used to handle the case when the subquery is embedded in a more complex boolean
 /// expression like and OR. For example
 ///
@@ -296,37 +287,26 @@ fn build_join_top(
 ///
 /// ```text
 /// Projection: t1.id
-///   Filter: t1.id < 0 OR __correlated_sq_1.__exists IS NOT NULL
-///     Left Join:  Filter: t1.id = __correlated_sq_1.id
+///   Filter: t1.id < 0 OR __correlated_sq_1.mark
+///     LeftMark Join:  Filter: t1.id = __correlated_sq_1.id
 ///       TableScan: t1
 ///       SubqueryAlias: __correlated_sq_1
-///         Projection: t2.id, true as __exists
+///         Projection: t2.id
 ///           TableScan: t2
-fn existence_join(
+fn mark_join(
     left: &LogicalPlan,
     subquery: Arc<LogicalPlan>,
     in_predicate_opt: Option<Expr>,
     negated: bool,
     alias_generator: &Arc<AliasGenerator>,
 ) -> Result<Option<(LogicalPlan, Expr)>> {
-    // Add non nullable column to emulate existence join
-    let always_true_expr = lit(true).alias("__exists");
-    let cols = chain(
-        subquery.schema().columns().into_iter().map(Expr::Column),
-        iter::once(always_true_expr),
-    );
-    let subquery = LogicalPlanBuilder::from(subquery).project(cols)?.build()?;
     let alias = alias_generator.next("__correlated_sq");
 
-    let exists_col = Expr::Column(Column::new(Some(alias.clone()), "__exists"));
-    let exists_expr = if negated {
-        exists_col.is_null()
-    } else {
-        exists_col.is_not_null()
-    };
+    let exists_col = Expr::Column(Column::new(Some(alias.clone()), "mark"));
+    let exists_expr = if negated { !exists_col } else { exists_col };
 
     Ok(
-        build_join(left, &subquery, in_predicate_opt, JoinType::Left, alias)?
+        build_join(left, &subquery, in_predicate_opt, JoinType::LeftMark, alias)?
             .map(|plan| (plan, exists_expr)),
     )
 }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 42eff7100fbe..94c04d6328ed 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -677,7 +677,11 @@ fn split_join_requirements(
 ) -> (RequiredIndicies, RequiredIndicies) {
     match join_type {
         // In these cases requirements are split between left/right children:
-        JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+        JoinType::Inner
+        | JoinType::Left
+        | JoinType::Right
+        | JoinType::Full
+        | JoinType::LeftMark => {
             // Decrease right side indices by `left_len` so that they point to valid
             // positions within the right child:
             indices.split_off(left_len)
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 3b3693cc9405..1d0fc207cb51 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -163,7 +163,7 @@ pub(crate) fn lr_is_preserved(join_type: JoinType) -> (bool, bool) {
         JoinType::Full => (false, false),
         // No columns from the right side of the join can be referenced in output
         // predicates for semi/anti joins, so whether we specify t/f doesn't matter.
-        JoinType::LeftSemi | JoinType::LeftAnti => (true, false),
+        JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => (true, false),
         // No columns from the left side of the join can be referenced in output
         // predicates for semi/anti joins, so whether we specify t/f doesn't matter.
         JoinType::RightSemi | JoinType::RightAnti => (false, true),
@@ -188,6 +188,7 @@ pub(crate) fn on_lr_is_preserved(join_type: JoinType) -> (bool, bool) {
         JoinType::LeftSemi | JoinType::RightSemi => (true, true),
         JoinType::LeftAnti => (false, true),
         JoinType::RightAnti => (true, false),
+        JoinType::LeftMark => (false, true),
     }
 }
 
@@ -732,11 +733,13 @@ fn infer_join_predicates_from_on_filters(
             on_filters,
             inferred_predicates,
         ),
-        JoinType::Left | JoinType::LeftSemi => infer_join_predicates_impl::<true, false>(
-            join_col_keys,
-            on_filters,
-            inferred_predicates,
-        ),
+        JoinType::Left | JoinType::LeftSemi | JoinType::LeftMark => {
+            infer_join_predicates_impl::<true, false>(
+                join_col_keys,
+                on_filters,
+                inferred_predicates,
+            )
+        }
         JoinType::Right | JoinType::RightSemi => {
             infer_join_predicates_impl::<false, true>(
                 join_col_keys,
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index ec7a0a1364b6..8a3aa4bb8459 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -248,7 +248,7 @@ fn push_down_join(mut join: Join, limit: usize) -> Transformed<Join> {
     let (left_limit, right_limit) = if is_no_join_condition(&join) {
         match join.join_type {
             Left | Right | Full | Inner => (Some(limit), Some(limit)),
-            LeftAnti | LeftSemi => (Some(limit), None),
+            LeftAnti | LeftSemi | LeftMark => (Some(limit), None),
             RightAnti | RightSemi => (None, Some(limit)),
         }
     } else {
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index c1851ddb22b5..7305bc1b0a2b 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -632,7 +632,7 @@ impl EquivalenceGroup {
                 }
                 result
             }
-            JoinType::LeftSemi | JoinType::LeftAnti => self.clone(),
+            JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => self.clone(),
             JoinType::RightSemi | JoinType::RightAnti => right_equivalences.clone(),
         }
     }
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 2d11e03814a3..c56c179c17eb 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -524,6 +524,7 @@ impl HashJoinExec {
                         | JoinType::Full
                         | JoinType::LeftAnti
                         | JoinType::LeftSemi
+                        | JoinType::LeftMark
                 ));
 
         let mode = if pipeline_breaking {
@@ -3091,6 +3092,94 @@ mod tests {
         Ok(())
     }
 
+    #[apply(batch_sizes)]
+    #[tokio::test]
+    async fn join_left_mark(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
+            ("c1", &vec![7, 8, 9]),
+        );
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![4, 5, 6]),
+            ("c2", &vec![70, 80, 90]),
+        );
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        )];
+
+        let (columns, batches) = join_collect(
+            Arc::clone(&left),
+            Arc::clone(&right),
+            on.clone(),
+            &JoinType::LeftMark,
+            false,
+            task_ctx,
+        )
+        .await?;
+        assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
+
+        let expected = [
+            "+----+----+----+-------+",
+            "| a1 | b1 | c1 | mark  |",
+            "+----+----+----+-------+",
+            "| 1  | 4  | 7  | true  |",
+            "| 2  | 5  | 8  | true  |",
+            "| 3  | 7  | 9  | false |",
+            "+----+----+----+-------+",
+        ];
+        assert_batches_sorted_eq!(expected, &batches);
+
+        Ok(())
+    }
+
+    #[apply(batch_sizes)]
+    #[tokio::test]
+    async fn partitioned_join_left_mark(batch_size: usize) -> Result<()> {
+        let task_ctx = prepare_task_ctx(batch_size);
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
+            ("c1", &vec![7, 8, 9]),
+        );
+        let right = build_table(
+            ("a2", &vec![10, 20, 30, 40]),
+            ("b1", &vec![4, 4, 5, 6]),
+            ("c2", &vec![60, 70, 80, 90]),
+        );
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        )];
+
+        let (columns, batches) = partitioned_join_collect(
+            Arc::clone(&left),
+            Arc::clone(&right),
+            on.clone(),
+            &JoinType::LeftMark,
+            false,
+            task_ctx,
+        )
+        .await?;
+        assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
+
+        let expected = [
+            "+----+----+----+-------+",
+            "| a1 | b1 | c1 | mark  |",
+            "+----+----+----+-------+",
+            "| 1  | 4  | 7  | true  |",
+            "| 2  | 5  | 8  | true  |",
+            "| 3  | 7  | 9  | false |",
+            "+----+----+----+-------+",
+        ];
+        assert_batches_sorted_eq!(expected, &batches);
+
+        Ok(())
+    }
+
     #[test]
     fn join_with_hash_collision() -> Result<()> {
         let mut hashmap_left = RawTable::with_capacity(2);
@@ -3476,6 +3565,15 @@ mod tests {
             "| 30 | 6  | 90 |",
             "+----+----+----+",
         ];
+        let expected_left_mark = vec![
+            "+----+----+----+-------+",
+            "| a1 | b1 | c1 | mark  |",
+            "+----+----+----+-------+",
+            "| 1  | 4  | 7  | true  |",
+            "| 2  | 5  | 8  | true  |",
+            "| 3  | 7  | 9  | false |",
+            "+----+----+----+-------+",
+        ];
 
         let test_cases = vec![
             (JoinType::Inner, expected_inner),
@@ -3486,6 +3584,7 @@ mod tests {
             (JoinType::LeftAnti, expected_left_anti),
             (JoinType::RightSemi, expected_right_semi),
             (JoinType::RightAnti, expected_right_anti),
+            (JoinType::LeftMark, expected_left_mark),
         ];
 
         for (join_type, expected) in test_cases {
@@ -3768,6 +3867,7 @@ mod tests {
             JoinType::LeftAnti,
             JoinType::RightSemi,
             JoinType::RightAnti,
+            JoinType::LeftMark,
         ];
 
         for join_type in join_types {
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 358ff02473a6..957230f51372 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -1244,6 +1244,37 @@ pub(crate) mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn join_left_mark_with_filter() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+        let left = build_left_table();
+        let right = build_right_table();
+
+        let filter = prepare_join_filter();
+        let (columns, batches) = multi_partitioned_join_collect(
+            left,
+            right,
+            &JoinType::LeftMark,
+            Some(filter),
+            task_ctx,
+        )
+        .await?;
+        assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]);
+        let expected = [
+            "+----+----+-----+-------+",
+            "| a1 | b1 | c1  | mark  |",
+            "+----+----+-----+-------+",
+            "| 11 | 8  | 110 | false |",
+            "| 5  | 5  | 50  | true  |",
+            "| 9  | 8  | 90  | false |",
+            "+----+----+-----+-------+",
+        ];
+
+        assert_batches_sorted_eq!(expected, &batches);
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_overallocation() -> Result<()> {
         let left = build_table(
@@ -1269,6 +1300,7 @@ pub(crate) mod tests {
             JoinType::Full,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightSemi,
             JoinType::RightAnti,
         ];
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index b299b495c504..20fafcc34773 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -35,7 +35,9 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use arrow::array::*;
-use arrow::compute::{self, concat_batches, filter_record_batch, take, SortOptions};
+use arrow::compute::{
+    self, concat_batches, filter_record_batch, is_not_null, take, SortOptions,
+};
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use arrow::error::ArrowError;
 use arrow::ipc::reader::FileReader;
@@ -178,7 +180,8 @@ impl SortMergeJoinExec {
             | JoinType::Left
             | JoinType::Full
             | JoinType::LeftAnti
-            | JoinType::LeftSemi => JoinSide::Left,
+            | JoinType::LeftSemi
+            | JoinType::LeftMark => JoinSide::Left,
         }
     }
 
@@ -186,7 +189,10 @@ impl SortMergeJoinExec {
     fn maintains_input_order(join_type: JoinType) -> Vec<bool> {
         match join_type {
             JoinType::Inner => vec![true, false],
-            JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => vec![true, false],
+            JoinType::Left
+            | JoinType::LeftSemi
+            | JoinType::LeftAnti
+            | JoinType::LeftMark => vec![true, false],
             JoinType::Right | JoinType::RightSemi | JoinType::RightAnti => {
                 vec![false, true]
             }
@@ -784,6 +790,29 @@ fn get_corrected_filter_mask(
             corrected_mask.extend(vec![Some(false); null_matched]);
             Some(corrected_mask.finish())
         }
+        JoinType::LeftMark => {
+            for i in 0..row_indices_length {
+                let last_index =
+                    last_index_for_row(i, row_indices, batch_ids, row_indices_length);
+                if filter_mask.value(i) && !seen_true {
+                    seen_true = true;
+                    corrected_mask.append_value(true);
+                } else if seen_true || !filter_mask.value(i) && !last_index {
+                    corrected_mask.append_null(); // to be ignored and not set to output
+                } else {
+                    corrected_mask.append_value(false); // to be converted to null joined row
+                }
+
+                if last_index {
+                    seen_true = false;
+                }
+            }
+
+            // Generate null joined rows for records which have no matching join key
+            let null_matched = expected_size - corrected_mask.len();
+            corrected_mask.extend(vec![Some(false); null_matched]);
+            Some(corrected_mask.finish())
+        }
         JoinType::LeftSemi => {
             for i in 0..row_indices_length {
                 let last_index =
@@ -860,6 +889,7 @@ impl Stream for SMJStream {
                                             self.join_type,
                                             JoinType::Left
                                                 | JoinType::LeftSemi
+                                                | JoinType::LeftMark
                                                 | JoinType::Right
                                                 | JoinType::LeftAnti
                                         )
@@ -943,6 +973,7 @@ impl Stream for SMJStream {
                                         | JoinType::LeftSemi
                                         | JoinType::Right
                                         | JoinType::LeftAnti
+                                        | JoinType::LeftMark
                                 )
                             {
                                 continue;
@@ -964,6 +995,7 @@ impl Stream for SMJStream {
                                     | JoinType::LeftSemi
                                     | JoinType::Right
                                     | JoinType::LeftAnti
+                                    | JoinType::LeftMark
                             )
                         {
                             let out = self.filter_joined_batch()?;
@@ -1264,6 +1296,8 @@ impl SMJStream {
         let mut join_streamed = false;
         // Whether to join buffered rows
         let mut join_buffered = false;
+        // For Mark join we store a dummy id to indicate the the row has a match
+        let mut mark_row_as_match = false;
 
         // determine whether we need to join streamed/buffered rows
         match self.current_ordering {
@@ -1275,12 +1309,14 @@ impl SMJStream {
                         | JoinType::RightSemi
                         | JoinType::Full
                         | JoinType::LeftAnti
+                        | JoinType::LeftMark
                 ) {
                     join_streamed = !self.streamed_joined;
                 }
             }
             Ordering::Equal => {
-                if matches!(self.join_type, JoinType::LeftSemi) {
+                if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftMark) {
+                    mark_row_as_match = matches!(self.join_type, JoinType::LeftMark);
                     // if the join filter is specified then its needed to output the streamed index
                     // only if it has not been emitted before
                     // the `join_filter_matched_idxs` keeps track on if streamed index has a successful
@@ -1357,9 +1393,11 @@ impl SMJStream {
             } else {
                 Some(self.buffered_data.scanning_batch_idx)
             };
+            // For Mark join we store a dummy id to indicate the the row has a match
+            let scanning_idx = mark_row_as_match.then_some(0);
 
             self.streamed_batch
-                .append_output_pair(scanning_batch_idx, None);
+                .append_output_pair(scanning_batch_idx, scanning_idx);
             self.output_size += 1;
             self.buffered_data.scanning_finish();
             self.streamed_joined = true;
@@ -1461,24 +1499,25 @@ impl SMJStream {
 
             // The row indices of joined buffered batch
             let buffered_indices: UInt64Array = chunk.buffered_indices.finish();
-            let mut buffered_columns =
-                if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftAnti) {
-                    vec![]
-                } else if let Some(buffered_idx) = chunk.buffered_batch_idx {
-                    get_buffered_columns(
-                        &self.buffered_data,
-                        buffered_idx,
-                        &buffered_indices,
-                    )?
-                } else {
-                    // If buffered batch none, meaning it is null joined batch.
-                    // We need to create null arrays for buffered columns to join with streamed rows.
-                    self.buffered_schema
-                        .fields()
-                        .iter()
-                        .map(|f| new_null_array(f.data_type(), buffered_indices.len()))
-                        .collect::<Vec<_>>()
-                };
+            let mut buffered_columns = if matches!(self.join_type, JoinType::LeftMark) {
+                vec![Arc::new(is_not_null(&buffered_indices)?) as ArrayRef]
+            } else if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftAnti) {
+                vec![]
+            } else if let Some(buffered_idx) = chunk.buffered_batch_idx {
+                get_buffered_columns(
+                    &self.buffered_data,
+                    buffered_idx,
+                    &buffered_indices,
+                )?
+            } else {
+                // If buffered batch none, meaning it is null joined batch.
+                // We need to create null arrays for buffered columns to join with streamed rows.
+                create_unmatched_columns(
+                    self.join_type,
+                    &self.buffered_schema,
+                    buffered_indices.len(),
+                )
+            };
 
             let streamed_columns_length = streamed_columns.len();
 
@@ -1489,7 +1528,7 @@ impl SMJStream {
                     get_filter_column(&self.filter, &buffered_columns, &streamed_columns)
                 } else if matches!(
                     self.join_type,
-                    JoinType::LeftSemi | JoinType::LeftAnti
+                    JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark
                 ) {
                     // unwrap is safe here as we check is_some on top of if statement
                     let buffered_columns = get_buffered_columns(
@@ -1517,7 +1556,6 @@ impl SMJStream {
             };
 
             let output_batch = RecordBatch::try_new(Arc::clone(&self.schema), columns)?;
-
             // Apply join filter if any
             if !filter_columns.is_empty() {
                 if let Some(f) = &self.filter {
@@ -1553,6 +1591,7 @@ impl SMJStream {
                             | JoinType::LeftSemi
                             | JoinType::Right
                             | JoinType::LeftAnti
+                            | JoinType::LeftMark
                     ) {
                         self.output_record_batches
                             .batches
@@ -1691,6 +1730,7 @@ impl SMJStream {
                     | JoinType::LeftSemi
                     | JoinType::Right
                     | JoinType::LeftAnti
+                    | JoinType::LeftMark
             ))
         {
             self.output_record_batches.batches.clear();
@@ -1721,16 +1761,18 @@ impl SMJStream {
         let buffered_columns_length = self.buffered_schema.fields.len();
         let streamed_columns_length = self.streamed_schema.fields.len();
 
-        if matches!(self.join_type, JoinType::Left | JoinType::Right) {
+        if matches!(
+            self.join_type,
+            JoinType::Left | JoinType::LeftMark | JoinType::Right
+        ) {
             let null_mask = compute::not(corrected_mask)?;
             let null_joined_batch = filter_record_batch(&record_batch, &null_mask)?;
 
-            let mut buffered_columns = self
-                .buffered_schema
-                .fields()
-                .iter()
-                .map(|f| new_null_array(f.data_type(), null_joined_batch.num_rows()))
-                .collect::<Vec<_>>();
+            let mut buffered_columns = create_unmatched_columns(
+                self.join_type,
+                &self.buffered_schema,
+                null_joined_batch.num_rows(),
+            );
 
             let columns = if matches!(self.join_type, JoinType::Right) {
                 let streamed_columns = null_joined_batch
@@ -1777,6 +1819,22 @@ impl SMJStream {
     }
 }
 
+fn create_unmatched_columns(
+    join_type: JoinType,
+    schema: &SchemaRef,
+    size: usize,
+) -> Vec<ArrayRef> {
+    if matches!(join_type, JoinType::LeftMark) {
+        vec![Arc::new(BooleanArray::from(vec![false; size])) as ArrayRef]
+    } else {
+        schema
+            .fields()
+            .iter()
+            .map(|f| new_null_array(f.data_type(), size))
+            .collect::<Vec<_>>()
+    }
+}
+
 /// Gets the arrays which join filters are applied on.
 fn get_filter_column(
     join_filter: &Option<JoinFilter>,
@@ -2716,6 +2774,39 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn join_left_mark() -> Result<()> {
+        let left = build_table(
+            ("a1", &vec![1, 2, 2, 3]),
+            ("b1", &vec![4, 5, 5, 7]), // 7 does not exist on the right
+            ("c1", &vec![7, 8, 8, 9]),
+        );
+        let right = build_table(
+            ("a2", &vec![10, 20, 30, 40]),
+            ("b1", &vec![4, 4, 5, 6]), // 5 is double on the right
+            ("c2", &vec![60, 70, 80, 90]),
+        );
+        let on = vec![(
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        )];
+
+        let (_, batches) = join_collect(left, right, on, LeftMark).await?;
+        let expected = [
+            "+----+----+----+-------+",
+            "| a1 | b1 | c1 | mark  |",
+            "+----+----+----+-------+",
+            "| 1  | 4  | 7  | true  |",
+            "| 2  | 5  | 8  | true  |",
+            "| 2  | 5  | 8  | true  |",
+            "| 3  | 7  | 9  | false |",
+            "+----+----+----+-------+",
+        ];
+        // The output order is important as SMJ preserves sortedness
+        assert_batches_eq!(expected, &batches);
+        Ok(())
+    }
+
     #[tokio::test]
     async fn join_with_duplicated_column_names() -> Result<()> {
         let left = build_table(
@@ -3047,7 +3138,7 @@ mod tests {
         )];
         let sort_options = vec![SortOptions::default(); on.len()];
 
-        let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti];
+        let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark];
 
         // Disable DiskManager to prevent spilling
         let runtime = RuntimeEnvBuilder::new()
@@ -3125,7 +3216,7 @@ mod tests {
         )];
         let sort_options = vec![SortOptions::default(); on.len()];
 
-        let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti];
+        let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark];
 
         // Disable DiskManager to prevent spilling
         let runtime = RuntimeEnvBuilder::new()
@@ -3181,7 +3272,7 @@ mod tests {
         )];
         let sort_options = vec![SortOptions::default(); on.len()];
 
-        let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti];
+        let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark];
 
         // Enable DiskManager to allow spilling
         let runtime = RuntimeEnvBuilder::new()
@@ -3282,7 +3373,7 @@ mod tests {
         )];
         let sort_options = vec![SortOptions::default(); on.len()];
 
-        let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti];
+        let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark];
 
         // Enable DiskManager to allow spilling
         let runtime = RuntimeEnvBuilder::new()
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index eb6a30d17e92..3e0cd48da2bf 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -62,6 +62,7 @@ use arrow::array::{
 use arrow::compute::concat_batches;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
+use arrow_buffer::ArrowNativeType;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::bisect;
 use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result};
@@ -670,7 +671,11 @@ fn need_to_produce_result_in_final(build_side: JoinSide, join_type: JoinType) ->
     if build_side == JoinSide::Left {
         matches!(
             join_type,
-            JoinType::Left | JoinType::LeftAnti | JoinType::Full | JoinType::LeftSemi
+            JoinType::Left
+                | JoinType::LeftAnti
+                | JoinType::Full
+                | JoinType::LeftSemi
+                | JoinType::LeftMark
         )
     } else {
         matches!(
@@ -709,6 +714,20 @@ where
 {
     // Store the result in a tuple
     let result = match (build_side, join_type) {
+        (JoinSide::Left, JoinType::LeftMark) => {
+            let build_indices = (0..prune_length)
+                .map(L::Native::from_usize)
+                .collect::<PrimitiveArray<L>>();
+            let probe_indices = (0..prune_length)
+                .map(|idx| {
+                    // For mark join we output a dummy index 0 to indicate the row had a match
+                    visited_rows
+                        .contains(&(idx + deleted_offset))
+                        .then_some(R::Native::from_usize(0).unwrap())
+                })
+                .collect();
+            (build_indices, probe_indices)
+        }
         // In the case of `Left` or `Right` join, or `Full` join, get the anti indices
         (JoinSide::Left, JoinType::Left | JoinType::LeftAnti)
         | (JoinSide::Right, JoinType::Right | JoinType::RightAnti)
@@ -872,6 +891,7 @@ pub(crate) fn join_with_probe_batch(
         JoinType::LeftAnti
             | JoinType::RightAnti
             | JoinType::LeftSemi
+            | JoinType::LeftMark
             | JoinType::RightSemi
     ) {
         Ok(None)
@@ -1707,6 +1727,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -1791,6 +1812,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -1855,6 +1877,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -1906,6 +1929,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -1933,6 +1957,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -2298,6 +2323,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -2380,6 +2406,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
@@ -2454,6 +2481,7 @@ mod tests {
             JoinType::RightSemi,
             JoinType::LeftSemi,
             JoinType::LeftAnti,
+            JoinType::LeftMark,
             JoinType::RightAnti,
             JoinType::Full
         )]
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 090cf9aa628a..e7c191f9835e 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -20,6 +20,7 @@
 use std::collections::HashSet;
 use std::fmt::{self, Debug};
 use std::future::Future;
+use std::iter::once;
 use std::ops::{IndexMut, Range};
 use std::sync::Arc;
 use std::task::{Context, Poll};
@@ -619,6 +620,7 @@ fn output_join_field(old_field: &Field, join_type: &JoinType, is_left: bool) ->
         JoinType::RightSemi => false, // doesn't introduce nulls
         JoinType::LeftAnti => false, // doesn't introduce nulls (or can it??)
         JoinType::RightAnti => false, // doesn't introduce nulls (or can it??)
+        JoinType::LeftMark => false,
     };
 
     if force_nullable {
@@ -635,44 +637,10 @@ pub fn build_join_schema(
     right: &Schema,
     join_type: &JoinType,
 ) -> (Schema, Vec<ColumnIndex>) {
-    let (fields, column_indices): (SchemaBuilder, Vec<ColumnIndex>) = match join_type {
-        JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => {
-            let left_fields = left
-                .fields()
-                .iter()
-                .map(|f| output_join_field(f, join_type, true))
-                .enumerate()
-                .map(|(index, f)| {
-                    (
-                        f,
-                        ColumnIndex {
-                            index,
-                            side: JoinSide::Left,
-                        },
-                    )
-                });
-            let right_fields = right
-                .fields()
-                .iter()
-                .map(|f| output_join_field(f, join_type, false))
-                .enumerate()
-                .map(|(index, f)| {
-                    (
-                        f,
-                        ColumnIndex {
-                            index,
-                            side: JoinSide::Right,
-                        },
-                    )
-                });
-
-            // left then right
-            left_fields.chain(right_fields).unzip()
-        }
-        JoinType::LeftSemi | JoinType::LeftAnti => left
-            .fields()
+    let left_fields = || {
+        left.fields()
             .iter()
-            .cloned()
+            .map(|f| output_join_field(f, join_type, true))
             .enumerate()
             .map(|(index, f)| {
                 (
@@ -683,11 +651,13 @@ pub fn build_join_schema(
                     },
                 )
             })
-            .unzip(),
-        JoinType::RightSemi | JoinType::RightAnti => right
+    };
+
+    let right_fields = || {
+        right
             .fields()
             .iter()
-            .cloned()
+            .map(|f| output_join_field(f, join_type, false))
             .enumerate()
             .map(|(index, f)| {
                 (
@@ -698,7 +668,25 @@ pub fn build_join_schema(
                     },
                 )
             })
-            .unzip(),
+    };
+
+    let (fields, column_indices): (SchemaBuilder, Vec<ColumnIndex>) = match join_type {
+        JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => {
+            // left then right
+            left_fields().chain(right_fields()).unzip()
+        }
+        JoinType::LeftSemi | JoinType::LeftAnti => left_fields().unzip(),
+        JoinType::LeftMark => {
+            let right_field = once((
+                Field::new("mark", arrow_schema::DataType::Boolean, false),
+                ColumnIndex {
+                    index: 0,
+                    side: JoinSide::None,
+                },
+            ));
+            left_fields().chain(right_field).unzip()
+        }
+        JoinType::RightSemi | JoinType::RightAnti => right_fields().unzip(),
     };
 
     let metadata = left
@@ -902,6 +890,16 @@ fn estimate_join_cardinality(
                 column_statistics: outer_stats.column_statistics,
             })
         }
+
+        JoinType::LeftMark => {
+            let num_rows = *left_stats.num_rows.get_value()?;
+            let mut column_statistics = left_stats.column_statistics;
+            column_statistics.push(ColumnStatistics::new_unknown());
+            Some(PartialJoinStatistics {
+                num_rows,
+                column_statistics,
+            })
+        }
     }
 }
 
@@ -1153,7 +1151,11 @@ impl<T: 'static> OnceFut<T> {
 pub(crate) fn need_produce_result_in_final(join_type: JoinType) -> bool {
     matches!(
         join_type,
-        JoinType::Left | JoinType::LeftAnti | JoinType::LeftSemi | JoinType::Full
+        JoinType::Left
+            | JoinType::LeftAnti
+            | JoinType::LeftSemi
+            | JoinType::LeftMark
+            | JoinType::Full
     )
 }
 
@@ -1171,6 +1173,13 @@ pub(crate) fn get_final_indices_from_bit_map(
     join_type: JoinType,
 ) -> (UInt64Array, UInt32Array) {
     let left_size = left_bit_map.len();
+    if join_type == JoinType::LeftMark {
+        let left_indices = (0..left_size as u64).collect::<UInt64Array>();
+        let right_indices = (0..left_size)
+            .map(|idx| left_bit_map.get_bit(idx).then_some(0))
+            .collect::<UInt32Array>();
+        return (left_indices, right_indices);
+    }
     let left_indices = if join_type == JoinType::LeftSemi {
         (0..left_size)
             .filter_map(|idx| (left_bit_map.get_bit(idx)).then_some(idx as u64))
@@ -1254,7 +1263,10 @@ pub(crate) fn build_batch_from_indices(
     let mut columns: Vec<Arc<dyn Array>> = Vec::with_capacity(schema.fields().len());
 
     for column_index in column_indices {
-        let array = if column_index.side == build_side {
+        let array = if column_index.side == JoinSide::None {
+            // LeftMark join, the mark column is a true if the indices is not null, otherwise it will be false
+            Arc::new(compute::is_not_null(probe_indices)?)
+        } else if column_index.side == build_side {
             let array = build_input_buffer.column(column_index.index);
             if array.is_empty() || build_indices.null_count() == build_indices.len() {
                 // Outer join would generate a null index when finding no match at our side.
@@ -1323,7 +1335,7 @@ pub(crate) fn adjust_indices_by_join_type(
             // the left_indices will not be used later for the `right anti` join
             Ok((left_indices, right_indices))
         }
-        JoinType::LeftSemi | JoinType::LeftAnti => {
+        JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
             // matched or unmatched left row will be produced in the end of loop
             // When visit the right batch, we can output the matched left row and don't need to wait the end of loop
             Ok((
@@ -1646,7 +1658,7 @@ pub(crate) fn symmetric_join_output_partitioning(
     let left_partitioning = left.output_partitioning();
     let right_partitioning = right.output_partitioning();
     match join_type {
-        JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
+        JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
             left_partitioning.clone()
         }
         JoinType::RightSemi | JoinType::RightAnti => right_partitioning.clone(),
@@ -1671,11 +1683,13 @@ pub(crate) fn asymmetric_join_output_partitioning(
             left.schema().fields().len(),
         ),
         JoinType::RightSemi | JoinType::RightAnti => right.output_partitioning().clone(),
-        JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::Full => {
-            Partitioning::UnknownPartitioning(
-                right.output_partitioning().partition_count(),
-            )
-        }
+        JoinType::Left
+        | JoinType::LeftSemi
+        | JoinType::LeftAnti
+        | JoinType::Full
+        | JoinType::LeftMark => Partitioning::UnknownPartitioning(
+            right.output_partitioning().partition_count(),
+        ),
     }
 }
 
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index 7f8bce6b206e..65cd33d523cd 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -84,6 +84,7 @@ enum JoinType {
   LEFTANTI = 5;
   RIGHTSEMI = 6;
   RIGHTANTI = 7;
+  LEFTMARK = 8;
 }
 
 enum JoinConstraint {
@@ -541,9 +542,10 @@ message ParquetOptions {
   string created_by = 16;
 }
 
-enum JoinSide{
+enum JoinSide {
   LEFT_SIDE = 0;
   RIGHT_SIDE = 1;
+  NONE = 2;
 }
 
 message Precision{
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index d848f795c684..a554e4ed2805 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -778,6 +778,7 @@ impl From<protobuf::JoinSide> for JoinSide {
         match t {
             protobuf::JoinSide::LeftSide => JoinSide::Left,
             protobuf::JoinSide::RightSide => JoinSide::Right,
+            protobuf::JoinSide::None => JoinSide::None,
         }
     }
 }
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index e8b46fbf7012..e8235ef7b9dd 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -3761,6 +3761,7 @@ impl serde::Serialize for JoinSide {
         let variant = match self {
             Self::LeftSide => "LEFT_SIDE",
             Self::RightSide => "RIGHT_SIDE",
+            Self::None => "NONE",
         };
         serializer.serialize_str(variant)
     }
@@ -3774,6 +3775,7 @@ impl<'de> serde::Deserialize<'de> for JoinSide {
         const FIELDS: &[&str] = &[
             "LEFT_SIDE",
             "RIGHT_SIDE",
+            "NONE",
         ];
 
         struct GeneratedVisitor;
@@ -3816,6 +3818,7 @@ impl<'de> serde::Deserialize<'de> for JoinSide {
                 match value {
                     "LEFT_SIDE" => Ok(JoinSide::LeftSide),
                     "RIGHT_SIDE" => Ok(JoinSide::RightSide),
+                    "NONE" => Ok(JoinSide::None),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
@@ -3838,6 +3841,7 @@ impl serde::Serialize for JoinType {
             Self::Leftanti => "LEFTANTI",
             Self::Rightsemi => "RIGHTSEMI",
             Self::Rightanti => "RIGHTANTI",
+            Self::Leftmark => "LEFTMARK",
         };
         serializer.serialize_str(variant)
     }
@@ -3857,6 +3861,7 @@ impl<'de> serde::Deserialize<'de> for JoinType {
             "LEFTANTI",
             "RIGHTSEMI",
             "RIGHTANTI",
+            "LEFTMARK",
         ];
 
         struct GeneratedVisitor;
@@ -3905,6 +3910,7 @@ impl<'de> serde::Deserialize<'de> for JoinType {
                     "LEFTANTI" => Ok(JoinType::Leftanti),
                     "RIGHTSEMI" => Ok(JoinType::Rightsemi),
                     "RIGHTANTI" => Ok(JoinType::Rightanti),
+                    "LEFTMARK" => Ok(JoinType::Leftmark),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index 939a4b3c2cd2..68e7f74c7f49 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -883,6 +883,7 @@ pub enum JoinType {
     Leftanti = 5,
     Rightsemi = 6,
     Rightanti = 7,
+    Leftmark = 8,
 }
 impl JoinType {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -899,6 +900,7 @@ impl JoinType {
             Self::Leftanti => "LEFTANTI",
             Self::Rightsemi => "RIGHTSEMI",
             Self::Rightanti => "RIGHTANTI",
+            Self::Leftmark => "LEFTMARK",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -912,6 +914,7 @@ impl JoinType {
             "LEFTANTI" => Some(Self::Leftanti),
             "RIGHTSEMI" => Some(Self::Rightsemi),
             "RIGHTANTI" => Some(Self::Rightanti),
+            "LEFTMARK" => Some(Self::Leftmark),
             _ => None,
         }
     }
@@ -1069,6 +1072,7 @@ impl CompressionTypeVariant {
 pub enum JoinSide {
     LeftSide = 0,
     RightSide = 1,
+    None = 2,
 }
 impl JoinSide {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -1079,6 +1083,7 @@ impl JoinSide {
         match self {
             Self::LeftSide => "LEFT_SIDE",
             Self::RightSide => "RIGHT_SIDE",
+            Self::None => "NONE",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -1086,6 +1091,7 @@ impl JoinSide {
         match value {
             "LEFT_SIDE" => Some(Self::LeftSide),
             "RIGHT_SIDE" => Some(Self::RightSide),
+            "NONE" => Some(Self::None),
             _ => None,
         }
     }
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index f9b8973e2d41..02a642a4af93 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -759,6 +759,7 @@ impl From<JoinSide> for protobuf::JoinSide {
         match t {
             JoinSide::Left => protobuf::JoinSide::LeftSide,
             JoinSide::Right => protobuf::JoinSide::RightSide,
+            JoinSide::None => protobuf::JoinSide::None,
         }
     }
 }
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index 939a4b3c2cd2..68e7f74c7f49 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -883,6 +883,7 @@ pub enum JoinType {
     Leftanti = 5,
     Rightsemi = 6,
     Rightanti = 7,
+    Leftmark = 8,
 }
 impl JoinType {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -899,6 +900,7 @@ impl JoinType {
             Self::Leftanti => "LEFTANTI",
             Self::Rightsemi => "RIGHTSEMI",
             Self::Rightanti => "RIGHTANTI",
+            Self::Leftmark => "LEFTMARK",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -912,6 +914,7 @@ impl JoinType {
             "LEFTANTI" => Some(Self::Leftanti),
             "RIGHTSEMI" => Some(Self::Rightsemi),
             "RIGHTANTI" => Some(Self::Rightanti),
+            "LEFTMARK" => Some(Self::Leftmark),
             _ => None,
         }
     }
@@ -1069,6 +1072,7 @@ impl CompressionTypeVariant {
 pub enum JoinSide {
     LeftSide = 0,
     RightSide = 1,
+    None = 2,
 }
 impl JoinSide {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -1079,6 +1083,7 @@ impl JoinSide {
         match self {
             Self::LeftSide => "LEFT_SIDE",
             Self::RightSide => "RIGHT_SIDE",
+            Self::None => "NONE",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -1086,6 +1091,7 @@ impl JoinSide {
         match value {
             "LEFT_SIDE" => Some(Self::LeftSide),
             "RIGHT_SIDE" => Some(Self::RightSide),
+            "NONE" => Some(Self::None),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 27bda7dd5ace..f25fb0bf2561 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -213,6 +213,7 @@ impl From<protobuf::JoinType> for JoinType {
             protobuf::JoinType::Rightsemi => JoinType::RightSemi,
             protobuf::JoinType::Leftanti => JoinType::LeftAnti,
             protobuf::JoinType::Rightanti => JoinType::RightAnti,
+            protobuf::JoinType::Leftmark => JoinType::LeftMark,
         }
     }
 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 5a6f3a32c668..8af7b19d9091 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -685,6 +685,7 @@ impl From<JoinType> for protobuf::JoinType {
             JoinType::RightSemi => protobuf::JoinType::Rightsemi,
             JoinType::LeftAnti => protobuf::JoinType::Leftanti,
             JoinType::RightAnti => protobuf::JoinType::Rightanti,
+            JoinType::LeftMark => protobuf::JoinType::Leftmark,
         }
     }
 }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 2c38a1d36c1e..6348aba49082 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -552,7 +552,7 @@ impl Unparser<'_> {
                     relation,
                     global: false,
                     join_operator: self
-                        .join_operator_to_sql(join.join_type, join_constraint),
+                        .join_operator_to_sql(join.join_type, join_constraint)?,
                 };
                 let mut from = select.pop_from().unwrap();
                 from.push_join(ast_join);
@@ -855,8 +855,8 @@ impl Unparser<'_> {
         &self,
         join_type: JoinType,
         constraint: ast::JoinConstraint,
-    ) -> ast::JoinOperator {
-        match join_type {
+    ) -> Result<ast::JoinOperator> {
+        Ok(match join_type {
             JoinType::Inner => ast::JoinOperator::Inner(constraint),
             JoinType::Left => ast::JoinOperator::LeftOuter(constraint),
             JoinType::Right => ast::JoinOperator::RightOuter(constraint),
@@ -865,7 +865,8 @@ impl Unparser<'_> {
             JoinType::LeftSemi => ast::JoinOperator::LeftSemi(constraint),
             JoinType::RightAnti => ast::JoinOperator::RightAnti(constraint),
             JoinType::RightSemi => ast::JoinOperator::RightSemi(constraint),
-        }
+            JoinType::LeftMark => unimplemented!("Unparsing of Left Mark join type"),
+        })
     }
 
     /// Convert the components of a USING clause to the USING AST. Returns
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 36de19f1c3aa..027b5ca8dcfb 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -1056,13 +1056,11 @@ where t1.t1_id > 40 or t1.t1_id in (select t2.t2_id from t2 where t1.t1_int > 0)
 ----
 logical_plan
 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int
-02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NOT NULL
-03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists
-04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id Filter: t1.t1_int > Int32(0)
-05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int]
-06)--------SubqueryAlias: __correlated_sq_1
-07)----------Projection: t2.t2_id, Boolean(true) AS __exists
-08)------------TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.mark
+03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id Filter: t1.t1_int > Int32(0)
+04)------TableScan: t1 projection=[t1_id, t1_name, t1_int]
+05)------SubqueryAlias: __correlated_sq_1
+06)--------TableScan: t2 projection=[t2_id]
 
 query ITI rowsort
 select t1.t1_id,
@@ -1085,13 +1083,12 @@ where t1.t1_id = 11 or t1.t1_id + 12 not in (select t2.t2_id + 1 from t2 where t
 ----
 logical_plan
 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int
-02)--Filter: t1.t1_id = Int32(11) OR __correlated_sq_1.__exists IS NULL
-03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists
-04)------Left Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > Int32(0)
-05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int]
-06)--------SubqueryAlias: __correlated_sq_1
-07)----------Projection: CAST(t2.t2_id AS Int64) + Int64(1), Boolean(true) AS __exists
-08)------------TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id = Int32(11) OR NOT __correlated_sq_1.mark
+03)----LeftMark Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > Int32(0)
+04)------TableScan: t1 projection=[t1_id, t1_name, t1_int]
+05)------SubqueryAlias: __correlated_sq_1
+06)--------Projection: CAST(t2.t2_id AS Int64) + Int64(1)
+07)----------TableScan: t2 projection=[t2_id]
 
 query ITI rowsort
 select t1.t1_id,
@@ -1113,13 +1110,11 @@ where t1.t1_id > 40 or exists (select * from t2 where t1.t1_id = t2.t2_id)
 ----
 logical_plan
 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int
-02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NOT NULL
-03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists
-04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id
-05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int]
-06)--------SubqueryAlias: __correlated_sq_1
-07)----------Projection: t2.t2_id, Boolean(true) AS __exists
-08)------------TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.mark
+03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id
+04)------TableScan: t1 projection=[t1_id, t1_name, t1_int]
+05)------SubqueryAlias: __correlated_sq_1
+06)--------TableScan: t2 projection=[t2_id]
 
 query ITI rowsort
 select t1.t1_id,
@@ -1132,6 +1127,9 @@ where t1.t1_id > 40 or exists (select * from t2 where t1.t1_id = t2.t2_id)
 22 b 2
 44 d 4
 
+statement ok
+set datafusion.explain.logical_plan_only = false;
+
 # not_exists_subquery_to_join_with_correlated_outer_filter_disjunction
 query TT
 explain select t1.t1_id,
@@ -1142,13 +1140,27 @@ where t1.t1_id > 40 or not exists (select * from t2 where t1.t1_id = t2.t2_id)
 ----
 logical_plan
 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int
-02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NULL
-03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists
-04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id
-05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int]
-06)--------SubqueryAlias: __correlated_sq_1
-07)----------Projection: t2.t2_id, Boolean(true) AS __exists
-08)------------TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id > Int32(40) OR NOT __correlated_sq_1.mark
+03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id
+04)------TableScan: t1 projection=[t1_id, t1_name, t1_int]
+05)------SubqueryAlias: __correlated_sq_1
+06)--------TableScan: t2 projection=[t2_id]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=2
+02)--FilterExec: t1_id@0 > 40 OR NOT mark@3, projection=[t1_id@0, t1_name@1, t1_int@2]
+03)----CoalesceBatchesExec: target_batch_size=2
+04)------HashJoinExec: mode=Partitioned, join_type=LeftMark, on=[(t1_id@0, t2_id@0)]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)--------CoalesceBatchesExec: target_batch_size=2
+10)----------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
+11)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
 
 query ITI rowsort
 select t1.t1_id,
@@ -1170,16 +1182,14 @@ where t1.t1_id in (select t3.t3_id from t3) and (t1.t1_id > 40 or t1.t1_id in (s
 ----
 logical_plan
 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int
-02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_2.__exists IS NOT NULL
-03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_2.__exists
-04)------Left Join: t1.t1_id = __correlated_sq_2.t2_id Filter: t1.t1_int > Int32(0)
-05)--------LeftSemi Join: t1.t1_id = __correlated_sq_1.t3_id
-06)----------TableScan: t1 projection=[t1_id, t1_name, t1_int]
-07)----------SubqueryAlias: __correlated_sq_1
-08)------------TableScan: t3 projection=[t3_id]
-09)--------SubqueryAlias: __correlated_sq_2
-10)----------Projection: t2.t2_id, Boolean(true) AS __exists
-11)------------TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_2.mark
+03)----LeftMark Join: t1.t1_id = __correlated_sq_2.t2_id Filter: t1.t1_int > Int32(0)
+04)------LeftSemi Join: t1.t1_id = __correlated_sq_1.t3_id
+05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int]
+06)--------SubqueryAlias: __correlated_sq_1
+07)----------TableScan: t3 projection=[t3_id]
+08)------SubqueryAlias: __correlated_sq_2
+09)--------TableScan: t2 projection=[t2_id]
 
 query ITI rowsort
 select t1.t1_id,
@@ -1192,6 +1202,18 @@ where t1.t1_id in (select t3.t3_id from t3) and (t1.t1_id > 40 or t1.t1_id in (s
 22 b 2
 44 d 4
 
+# Handle duplicate values in exists query
+query ITI rowsort
+select t1.t1_id,
+       t1.t1_name,
+       t1.t1_int
+from t1
+where t1.t1_id > 40 or exists (select * from t2 cross join t3 where t1.t1_id = t2.t2_id)
+----
+11 a 1
+22 b 2
+44 d 4
+
 # Nested subqueries
 query ITI rowsort
 select t1.t1_id,
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 2aaf8ec0aa06..289aa7b7f448 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -1226,6 +1226,7 @@ fn from_substrait_jointype(join_type: i32) -> Result<JoinType> {
             join_rel::JoinType::Outer => Ok(JoinType::Full),
             join_rel::JoinType::LeftAnti => Ok(JoinType::LeftAnti),
             join_rel::JoinType::LeftSemi => Ok(JoinType::LeftSemi),
+            join_rel::JoinType::LeftMark => Ok(JoinType::LeftMark),
             _ => plan_err!("unsupported join type {substrait_join_type:?}"),
         }
     } else {
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 408885f70687..c73029f130ad 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -725,7 +725,10 @@ fn to_substrait_jointype(join_type: JoinType) -> join_rel::JoinType {
         JoinType::Full => join_rel::JoinType::Outer,
         JoinType::LeftAnti => join_rel::JoinType::LeftAnti,
         JoinType::LeftSemi => join_rel::JoinType::LeftSemi,
-        JoinType::RightAnti | JoinType::RightSemi => unimplemented!(),
+        JoinType::LeftMark => join_rel::JoinType::LeftMark,
+        JoinType::RightAnti | JoinType::RightSemi => {
+            unimplemented!()
+        }
     }
 }
 
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 04530dd34d4b..8fbdefe2852e 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -453,15 +453,15 @@ async fn roundtrip_inlist_5() -> Result<()> {
     // on roundtrip there is an additional projection during TableScan which includes all column of the table,
     // using assert_expected_plan here as a workaround
     assert_expected_plan(
-    "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))",
-    "Projection: data.a, data.f\
-     \n  Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR Boolean(true) IS NOT NULL\
-     \n    Projection: data.a, data.f, Boolean(true)\
-     \n      Left Join: data.a = data2.a\
-     \n        TableScan: data projection=[a, f]\
-     \n        Projection: data2.a, Boolean(true)\
-     \n          Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\
-     \n            TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]",
+        "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))",
+
+        "Projection: data.a, data.f\
+        \n  Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR data2.mark\
+        \n    LeftMark Join: data.a = data2.a\
+        \n      TableScan: data projection=[a, f]\
+        \n      Projection: data2.a\
+        \n        Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\
+        \n          TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]",
     true).await
 }
 

From f2bebcde8a2350b9f00705f7c2c0242359f7d948 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 31 Oct 2024 14:54:11 -0400
Subject: [PATCH 18/32] Minor: Reduce indirection for finding changlog (#13199)

* Minor: Improve changelog links

* revert changelog
---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea0c339ac451..c481ce0b96a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@
   under the License.
 -->
 
-* [DataFusion CHANGELOG](./datafusion/CHANGELOG.md)
+Change logs for each release can be found [here](dev/changelog).
+
 
 For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md).

From d2a15b3f8a9394b4c2bf283de692f73aecc9e17c Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Fri, 1 Nov 2024 10:51:31 +0800
Subject: [PATCH 19/32] feat: support logical plan for `EXECUTE` statement
 (#13194)

* Add Execute LogicalPlan

* Fix compile

* Add tests
---
 datafusion/core/src/physical_planner.rs       |  3 ++
 datafusion/expr/src/expr_rewriter/mod.rs      |  1 +
 datafusion/expr/src/logical_plan/display.rs   | 17 ++++++--
 datafusion/expr/src/logical_plan/mod.rs       |  2 +-
 datafusion/expr/src/logical_plan/plan.rs      | 39 +++++++++++++++++++
 datafusion/expr/src/logical_plan/tree_node.rs | 26 +++++++++++--
 .../optimizer/src/common_subexpr_eliminate.rs |  3 +-
 .../optimizer/src/optimize_projections/mod.rs |  3 +-
 datafusion/proto/src/logical_plan/mod.rs      |  3 ++
 datafusion/sql/src/statement.rs               | 26 ++++++++++++-
 datafusion/sql/src/unparser/plan.rs           |  1 +
 .../sqllogictest/test_files/prepare.slt       | 18 +++++++++
 12 files changed, 130 insertions(+), 12 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 5aa702550b08..c16f3ad104b8 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1201,6 +1201,9 @@ impl DefaultPhysicalPlanner {
                 // statement can be prepared)
                 return not_impl_err!("Unsupported logical plan: Prepare");
             }
+            LogicalPlan::Execute(_) => {
+                return not_impl_err!("Unsupported logical plan: Execute");
+            }
             LogicalPlan::Dml(dml) => {
                 // DataFusion is a read-only query engine, but also a library, so consumers may implement this
                 return not_impl_err!("Unsupported logical plan: Dml({0})", dml.op);
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index d6d5c3e2931c..c86696854ca3 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -314,6 +314,7 @@ impl NamePreserver {
                     | LogicalPlan::Join(_)
                     | LogicalPlan::TableScan(_)
                     | LogicalPlan::Limit(_)
+                    | LogicalPlan::Execute(_)
             ),
         }
     }
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index c0549451a776..9aea7747c414 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -20,10 +20,10 @@ use std::collections::HashMap;
 use std::fmt;
 
 use crate::{
-    expr_vec_fmt, Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Expr,
-    Filter, Join, Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery,
-    Repartition, Sort, Subquery, SubqueryAlias, TableProviderFilterPushDown, TableScan,
-    Unnest, Values, Window,
+    expr_vec_fmt, Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Execute,
+    Expr, Filter, Join, Limit, LogicalPlan, Partitioning, Prepare, Projection,
+    RecursiveQuery, Repartition, Sort, Subquery, SubqueryAlias,
+    TableProviderFilterPushDown, TableScan, Unnest, Values, Window,
 };
 
 use crate::dml::CopyTo;
@@ -626,6 +626,15 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     "Data Types": format!("{:?}", data_types)
                 })
             }
+            LogicalPlan::Execute(Execute {
+                name, parameters, ..
+            }) => {
+                json!({
+                    "Node Type": "Execute",
+                    "Name": name,
+                    "Parameters": expr_vec_fmt!(parameters),
+                })
+            }
             LogicalPlan::DescribeTable(DescribeTable { .. }) => {
                 json!({
                     "Node Type": "DescribeTable"
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index 80a896212442..59654a227829 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -36,7 +36,7 @@ pub use ddl::{
 pub use dml::{DmlStatement, WriteOp};
 pub use plan::{
     projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct,
-    DistinctOn, EmptyRelation, Explain, Extension, FetchType, Filter, Join,
+    DistinctOn, EmptyRelation, Execute, Explain, Extension, FetchType, Filter, Join,
     JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Prepare,
     Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
     SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 8ba2a44842bc..191a42e38e3a 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -266,6 +266,8 @@ pub enum LogicalPlan {
     /// Prepare a statement and find any bind parameters
     /// (e.g. `?`). This is used to implement SQL-prepared statements.
     Prepare(Prepare),
+    /// Execute a prepared statement. This is used to implement SQL 'EXECUTE'.
+    Execute(Execute),
     /// Data Manipulation Language (DML): Insert / Update / Delete
     Dml(DmlStatement),
     /// Data Definition Language (DDL): CREATE / DROP TABLES / VIEWS / SCHEMAS
@@ -314,6 +316,7 @@ impl LogicalPlan {
             LogicalPlan::Subquery(Subquery { subquery, .. }) => subquery.schema(),
             LogicalPlan::SubqueryAlias(SubqueryAlias { schema, .. }) => schema,
             LogicalPlan::Prepare(Prepare { input, .. }) => input.schema(),
+            LogicalPlan::Execute(Execute { schema, .. }) => schema,
             LogicalPlan::Explain(explain) => &explain.schema,
             LogicalPlan::Analyze(analyze) => &analyze.schema,
             LogicalPlan::Extension(extension) => extension.node.schema(),
@@ -457,6 +460,7 @@ impl LogicalPlan {
             | LogicalPlan::Statement { .. }
             | LogicalPlan::EmptyRelation { .. }
             | LogicalPlan::Values { .. }
+            | LogicalPlan::Execute { .. }
             | LogicalPlan::DescribeTable(_) => vec![],
         }
     }
@@ -560,6 +564,7 @@ impl LogicalPlan {
             LogicalPlan::Subquery(_) => Ok(None),
             LogicalPlan::EmptyRelation(_)
             | LogicalPlan::Prepare(_)
+            | LogicalPlan::Execute(_)
             | LogicalPlan::Statement(_)
             | LogicalPlan::Values(_)
             | LogicalPlan::Explain(_)
@@ -712,6 +717,7 @@ impl LogicalPlan {
             LogicalPlan::Analyze(_) => Ok(self),
             LogicalPlan::Explain(_) => Ok(self),
             LogicalPlan::Prepare(_) => Ok(self),
+            LogicalPlan::Execute(_) => Ok(self),
             LogicalPlan::TableScan(_) => Ok(self),
             LogicalPlan::EmptyRelation(_) => Ok(self),
             LogicalPlan::Statement(_) => Ok(self),
@@ -1072,6 +1078,14 @@ impl LogicalPlan {
                     input: Arc::new(input),
                 }))
             }
+            LogicalPlan::Execute(Execute { name, schema, .. }) => {
+                self.assert_no_inputs(inputs)?;
+                Ok(LogicalPlan::Execute(Execute {
+                    name: name.clone(),
+                    schema: Arc::clone(schema),
+                    parameters: expr,
+                }))
+            }
             LogicalPlan::TableScan(ts) => {
                 self.assert_no_inputs(inputs)?;
                 Ok(LogicalPlan::TableScan(TableScan {
@@ -1330,6 +1344,7 @@ impl LogicalPlan {
             | LogicalPlan::Copy(_)
             | LogicalPlan::DescribeTable(_)
             | LogicalPlan::Prepare(_)
+            | LogicalPlan::Execute(_)
             | LogicalPlan::Statement(_)
             | LogicalPlan::Extension(_) => None,
         }
@@ -1933,6 +1948,9 @@ impl LogicalPlan {
                     }) => {
                         write!(f, "Prepare: {name:?} {data_types:?} ")
                     }
+                    LogicalPlan::Execute(Execute { name, parameters, .. }) => {
+                        write!(f, "Execute: {} params=[{}]", name, expr_vec_fmt!(parameters))
+                    }
                     LogicalPlan::DescribeTable(DescribeTable { .. }) => {
                         write!(f, "DescribeTable")
                     }
@@ -2599,6 +2617,27 @@ pub struct Prepare {
     pub input: Arc<LogicalPlan>,
 }
 
+/// Execute a prepared statement.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Execute {
+    /// The name of the prepared statement to execute
+    pub name: String,
+    /// The execute parameters
+    pub parameters: Vec<Expr>,
+    /// Dummy schema
+    pub schema: DFSchemaRef,
+}
+
+// Comparison excludes the `schema` field.
+impl PartialOrd for Execute {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.name.partial_cmp(&other.name) {
+            Some(Ordering::Equal) => self.parameters.partial_cmp(&other.parameters),
+            cmp => cmp,
+        }
+    }
+}
+
 /// Describe the schema of table
 ///
 /// # Example output:
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs
index 0658f7029740..ff2c1ec1d58f 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -38,10 +38,10 @@
 //! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions
 use crate::{
     dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, DdlStatement,
-    Distinct, DistinctOn, DmlStatement, Explain, Expr, Extension, Filter, Join, Limit,
-    LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition, Sort,
-    Subquery, SubqueryAlias, TableScan, Union, Unnest, UserDefinedLogicalNode, Values,
-    Window,
+    Distinct, DistinctOn, DmlStatement, Execute, Explain, Expr, Extension, Filter, Join,
+    Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition,
+    Sort, Subquery, SubqueryAlias, TableScan, Union, Unnest, UserDefinedLogicalNode,
+    Values, Window,
 };
 use std::ops::Deref;
 use std::sync::Arc;
@@ -363,6 +363,7 @@ impl TreeNode for LogicalPlan {
             | LogicalPlan::Statement { .. }
             | LogicalPlan::EmptyRelation { .. }
             | LogicalPlan::Values { .. }
+            | LogicalPlan::Execute { .. }
             | LogicalPlan::DescribeTable(_) => Transformed::no(self),
         })
     }
@@ -505,6 +506,9 @@ impl LogicalPlan {
                 .chain(fetch.iter())
                 .map(|e| e.deref())
                 .apply_until_stop(f),
+            LogicalPlan::Execute(Execute { parameters, .. }) => {
+                parameters.iter().apply_until_stop(f)
+            }
             // plans without expressions
             LogicalPlan::EmptyRelation(_)
             | LogicalPlan::RecursiveQuery(_)
@@ -734,6 +738,20 @@ impl LogicalPlan {
                     })
                 })
             }
+            LogicalPlan::Execute(Execute {
+                parameters,
+                name,
+                schema,
+            }) => parameters
+                .into_iter()
+                .map_until_stop_and_collect(f)?
+                .update_data(|parameters| {
+                    LogicalPlan::Execute(Execute {
+                        parameters,
+                        name,
+                        schema,
+                    })
+                }),
             // plans without expressions
             LogicalPlan::EmptyRelation(_)
             | LogicalPlan::Unnest(_)
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index ee9ae9fb15a7..4fe22d252744 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -553,7 +553,8 @@ impl OptimizerRule for CommonSubexprEliminate {
             | LogicalPlan::Copy(_)
             | LogicalPlan::Unnest(_)
             | LogicalPlan::RecursiveQuery(_)
-            | LogicalPlan::Prepare(_) => {
+            | LogicalPlan::Prepare(_)
+            | LogicalPlan::Execute(_) => {
                 // This rule handles recursion itself in a `ApplyOrder::TopDown` like
                 // manner.
                 plan.map_children(|c| self.rewrite(c, config))?
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 94c04d6328ed..ec2225bbc042 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -348,7 +348,8 @@ fn optimize_projections(
         | LogicalPlan::RecursiveQuery(_)
         | LogicalPlan::Statement(_)
         | LogicalPlan::Values(_)
-        | LogicalPlan::DescribeTable(_) => {
+        | LogicalPlan::DescribeTable(_)
+        | LogicalPlan::Execute(_) => {
             // These operators have no inputs, so stop the optimization process.
             return Ok(Transformed::no(plan));
         }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index b90ae88aa74a..1993598f5cf7 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -1633,6 +1633,9 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlan::RecursiveQuery(_) => Err(proto_error(
                 "LogicalPlan serde is not yet implemented for RecursiveQuery",
             )),
+            LogicalPlan::Execute(_) => Err(proto_error(
+                "LogicalPlan serde is not yet implemented for Execute",
+            )),
         }
     }
 }
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index abb9912b712a..00949aa13ae1 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -48,7 +48,7 @@ use datafusion_expr::{
     CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody,
     CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, DescribeTable,
     DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView, EmptyRelation,
-    Explain, Expr, ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder,
+    Execute, Explain, Expr, ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder,
     OperateFunctionArg, PlanType, Prepare, SetVariable, SortExpr,
     Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
     TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart,
@@ -642,6 +642,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     input: Arc::new(plan),
                 }))
             }
+            Statement::Execute {
+                name,
+                parameters,
+                using,
+            } => {
+                // `USING` is a MySQL-specific syntax and currently not supported.
+                if !using.is_empty() {
+                    return not_impl_err!(
+                        "Execute statement with USING is not supported"
+                    );
+                }
+
+                let empty_schema = DFSchema::empty();
+                let parameters = parameters
+                    .into_iter()
+                    .map(|expr| self.sql_to_expr(expr, &empty_schema, planner_context))
+                    .collect::<Result<Vec<Expr>>>()?;
+
+                Ok(LogicalPlan::Execute(Execute {
+                    name: ident_to_string(&name),
+                    parameters,
+                    schema: DFSchemaRef::new(empty_schema),
+                }))
+            }
 
             Statement::ShowTables {
                 extended,
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 6348aba49082..8167ddacffb4 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -112,6 +112,7 @@ impl Unparser<'_> {
             | LogicalPlan::Analyze(_)
             | LogicalPlan::Extension(_)
             | LogicalPlan::Prepare(_)
+            | LogicalPlan::Execute(_)
             | LogicalPlan::Ddl(_)
             | LogicalPlan::Copy(_)
             | LogicalPlan::DescribeTable(_)
diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt
index ce4b7217f990..e306ec7767c7 100644
--- a/datafusion/sqllogictest/test_files/prepare.slt
+++ b/datafusion/sqllogictest/test_files/prepare.slt
@@ -80,3 +80,21 @@ PREPARE my_plan(INT, DOUBLE, DOUBLE, DOUBLE) AS SELECT id, SUM(age) FROM person
 
 statement error
 PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);
+
+# test creating logical plan for EXECUTE statements
+query TT
+EXPLAIN EXECUTE my_plan;
+----
+logical_plan Execute: my_plan params=[]
+
+query TT
+EXPLAIN EXECUTE my_plan(10*2 + 1, 'Foo');
+----
+logical_plan Execute: my_plan params=[Int64(21), Utf8("Foo")]
+
+query error DataFusion error: Schema error: No field named a\.
+EXPLAIN EXECUTE my_plan(a);
+
+# TODO: support EXECUTE queries
+query error DataFusion error: This feature is not implemented: Unsupported logical plan: Execute
+EXECUTE my_plan;

From 6c5823ec25a0fc14a49922b10a3d274e072c4bd6 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Fri, 1 Nov 2024 10:23:23 -0500
Subject: [PATCH 20/32] Support `DictionaryArray` in `OVER` clause  (#13153)

* implement target type selection for range queries on dictionary data types

Fixes #13151

* Update type_coercion.rs

* Add test

* query I?
---
 .../optimizer/src/analyzer/type_coercion.rs   | 28 +++++++++++--------
 .../sqllogictest/test_files/dictionary.slt    |  6 ++++
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 5d33b58a0241..9793c4c5490f 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -688,6 +688,21 @@ fn coerce_frame_bound(
     }
 }
 
+fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
+    if col_type.is_numeric()
+        || is_utf8_or_large_utf8(col_type)
+        || matches!(col_type, DataType::Null)
+    {
+        Ok(col_type.clone())
+    } else if is_datetime(col_type) {
+        Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+    } else if let DataType::Dictionary(_, value_type) = col_type {
+        extract_window_frame_target_type(value_type)
+    } else {
+        return internal_err!("Cannot run range queries on datatype: {col_type:?}");
+    }
+}
+
 // Coerces the given `window_frame` to use appropriate natural types.
 // For example, ROWS and GROUPS frames use `UInt64` during calculations.
 fn coerce_window_frame(
@@ -703,18 +718,7 @@ fn coerce_window_frame(
                 .map(|s| s.expr.get_type(schema))
                 .transpose()?;
             if let Some(col_type) = current_types {
-                if col_type.is_numeric()
-                    || is_utf8_or_large_utf8(&col_type)
-                    || matches!(col_type, DataType::Null)
-                {
-                    col_type
-                } else if is_datetime(&col_type) {
-                    DataType::Interval(IntervalUnit::MonthDayNano)
-                } else {
-                    return internal_err!(
-                        "Cannot run range queries on datatype: {col_type:?}"
-                    );
-                }
+                extract_window_frame_target_type(&col_type)?
             } else {
                 return internal_err!("ORDER BY column cannot be empty");
             }
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index 176331f570b0..b6923fcc944d 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -444,3 +444,9 @@ physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: column2@1 = 1
 03)----MemoryExec: partitions=1, partition_sizes=[1]
+
+# Window Functions
+query I
+select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)'));
+----
+1

From 5e53b631dc2654a3c63ad80783745ca255ecfc94 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 1 Nov 2024 17:23:28 +0100
Subject: [PATCH 21/32] Allow testing records with sibling whitespace in SLT
 tests and add more string tests (#13197)

* empty

* Allow testing values with trailing whitespace in SLT tests

* Update SLT tests for "Allow testing values with trailing whitespace ..."

* Add empty string to string test data
---
 datafusion/sqllogictest/bin/sqllogictests.rs  |  23 ++-
 datafusion/sqllogictest/test_files/avro.slt   |  32 ++---
 datafusion/sqllogictest/test_files/map.slt    |   4 +-
 .../test_files/pg_compat/pg_compat_types.slt  |   6 +-
 datafusion/sqllogictest/test_files/select.slt |   6 +-
 .../test_files/string/dictionary_utf8.slt     |   2 +
 .../test_files/string/init_data.slt.part      |   2 +
 .../test_files/string/large_string.slt        |   4 +
 .../sqllogictest/test_files/string/string.slt |  12 ++
 .../test_files/string/string_literal.slt      |   5 +
 .../test_files/string/string_query.slt.part   | 131 ++++++++++++++++--
 .../test_files/string/string_view.slt         |  12 ++
 datafusion/sqllogictest/test_files/window.slt |  72 +++++-----
 13 files changed, 239 insertions(+), 72 deletions(-)

diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index 2479252a7b5b..c3e739d146c6 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -22,6 +22,7 @@ use std::path::{Path, PathBuf};
 use clap::Parser;
 use datafusion_sqllogictest::{DataFusion, TestContext};
 use futures::stream::StreamExt;
+use itertools::Itertools;
 use log::info;
 use sqllogictest::strict_column_validator;
 
@@ -39,6 +40,23 @@ pub fn main() -> Result<()> {
         .block_on(run_tests())
 }
 
+fn value_validator(actual: &[Vec<String>], expected: &[String]) -> bool {
+    let expected = expected
+        .iter()
+        // Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not
+        // If particular test wants to cover trailing whitespace on a value,
+        // it should project additional non-whitespace column on the right.
+        .map(|s| s.trim_end().to_owned())
+        .collect::<Vec<_>>();
+    let actual = actual
+        .iter()
+        .map(|strs| strs.iter().join(" "))
+        // Editors do not preserve trailing whitespace, so expected may or may not lack it included
+        .map(|s| s.trim_end().to_owned())
+        .collect::<Vec<_>>();
+    actual == expected
+}
+
 /// Sets up an empty directory at test_files/scratch/<name>
 /// creating it if needed and clearing any file contents if it exists
 /// This allows tests for inserting to external tables or copy to
@@ -140,6 +158,7 @@ async fn run_test_file(test_file: TestFile) -> Result<()> {
         ))
     });
     runner.with_column_validator(strict_column_validator);
+    runner.with_validator(value_validator);
     runner
         .run_file_async(path)
         .await
@@ -158,6 +177,7 @@ async fn run_test_file_with_postgres(test_file: TestFile) -> Result<()> {
     let mut runner =
         sqllogictest::Runner::new(|| Postgres::connect(relative_path.clone()));
     runner.with_column_validator(strict_column_validator);
+    runner.with_validator(value_validator);
     runner
         .run_file_async(path)
         .await
@@ -176,7 +196,6 @@ async fn run_complete_file(test_file: TestFile) -> Result<()> {
         path,
         relative_path,
     } = test_file;
-    use sqllogictest::default_validator;
 
     info!("Using complete mode to complete: {}", path.display());
 
@@ -196,7 +215,7 @@ async fn run_complete_file(test_file: TestFile) -> Result<()> {
         .update_test_file(
             path,
             col_separator,
-            default_validator,
+            value_validator,
             strict_column_validator,
         )
         .await
diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt
index f8ef81a8ba2b..8282331f995e 100644
--- a/datafusion/sqllogictest/test_files/avro.slt
+++ b/datafusion/sqllogictest/test_files/avro.slt
@@ -198,22 +198,22 @@ NULL
 query IT
 SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_multi_files
 ----
-4  0
-5  1
-6  0
-7  1
-2  0
-3  1
-0  0
-1  1
-4  0
-5  1
-6  0
-7  1
-2  0
-3  1
-0  0
-1  1
+4 0
+5 1
+6 0
+7 1
+2 0
+3 1
+0 0
+1 1
+4 0
+5 1
+6 0
+7 1
+2 0
+3 1
+0 0
+1 1
 
 # test avro nested records
 query ????
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index ed4f999aa16c..10ca3ae881bf 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -433,7 +433,7 @@ SELECT MAP {'a': 1, null: 2}
 query ?
 SELECT MAP {[1,2,3]:1, [2,4]:2};
 ----
- {[1, 2, 3]: 1, [2, 4]: 2}
+{[1, 2, 3]: 1, [2, 4]: 2}
 
 # array with different type as key
 # expect to fail due to type coercion error
@@ -483,7 +483,7 @@ SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 };
 query ?
 SELECT MAP { MAP {1:'a', 2:'b', 3:'c'}:1, MAP {2:'c', 4:'d'}:2 };
 ----
- {{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2}
+{{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2}
 
 # map as value
 query ?
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt
index b7497429fa95..7e315a448b48 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt
@@ -24,18 +24,18 @@ NULL
 query TT
 select 'a'::VARCHAR, ''::VARCHAR
 ----
-a  (empty)
+a (empty)
 
 skipif postgres
 query TT
 select 'a'::CHAR, ''::CHAR
 ----
-a  (empty)
+a (empty)
 
 query TT
 select 'a'::TEXT, ''::TEXT
 ----
-a  (empty)
+a (empty)
 
 skipif postgres
 query I
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index f2ab4135aaa7..d7ff51011b65 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -459,7 +459,7 @@ VALUES (-1)
 query IIB
 VALUES (2+1,2-1,2>1)
 ----
-3    1    true
+3 1 true
 
 # multiple rows values
 query I rowsort
@@ -472,8 +472,8 @@ VALUES (1),(2)
 query IT rowsort
 VALUES (1,'a'),(2,'b')
 ----
-1   a
-2   b
+1 a
+2 b
 
 # table foo for distinct order by
 statement ok
diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
index 8a2916f669d0..c43f3a4cc16b 100644
--- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
+++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
@@ -58,6 +58,8 @@ Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
 under_score un iść core false false false false
 percent pan Tadeusz ma iść w kąt false false false false
+(empty) (empty) false false false false
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 #
diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part b/datafusion/sqllogictest/test_files/string/init_data.slt.part
index 18cd022f7882..e3914ea49855 100644
--- a/datafusion/sqllogictest/test_files/string/init_data.slt.part
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -22,6 +22,8 @@ create table test_source as values
   ('Raphael', 'R', 'datafusionДатаФусион', 'аФус'),
   ('under_score', 'un_____core', 'un iść core', 'chrząszcz na łące w 東京都'),
   ('percent', 'p%t', 'pan Tadeusz ma iść w kąt', 'Pan Tadeusz ma frunąć stąd w kąt'),
+  ('', '%', '', ''),
+  (NULL, '%', NULL, NULL),
   (NULL, 'R', NULL, '🔥');
 
 # --------------------------------------
diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt
index 2063eae0f8ed..1cf906d7dc75 100644
--- a/datafusion/sqllogictest/test_files/string/large_string.slt
+++ b/datafusion/sqllogictest/test_files/string/large_string.slt
@@ -43,6 +43,8 @@ Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
 Raphael R datafusionДатаФусион аФус
 under_score un_____core un iść core chrząszcz na łące w 東京都
 percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
+(empty) % (empty) (empty)
+NULL % NULL NULL
 NULL R NULL 🔥
 
 # TODO: move it back to `string_query.slt.part` after fixing the issue
@@ -61,6 +63,8 @@ Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
 under_score un iść core false false false false
 percent pan Tadeusz ma iść w kąt false false false false
+(empty) (empty) false false false false
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 #
diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt
index 62b0cae5f665..9e97712b6871 100644
--- a/datafusion/sqllogictest/test_files/string/string.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -50,6 +50,8 @@ Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
 under_score un iść core false false false false
 percent pan Tadeusz ma iść w kąt false false false false
+(empty) (empty) false false false false
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 #
@@ -72,6 +74,9 @@ SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LI
 UNION ALL
 SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
 ----
+(empty) is LIKE %
+(empty) is LIKE %
+(empty) is LIKE %
 Andrew is NOT LIKE X
 Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
 Raphael is NOT LIKE R
@@ -100,7 +105,9 @@ SELECT
     (unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
 FROM test_basic_operator
 ----
+(empty) % (empty) (empty) true false true true
 Andrew X datafusion📊🔥 🔥 false false false false
+NULL % NULL NULL NULL NULL NULL NULL
 NULL R NULL 🔥 NULL NULL NULL false
 Raphael R datafusionДатаФусион аФус false false false false
 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
@@ -122,6 +129,9 @@ SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 I
 UNION ALL
 SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
 ----
+(empty) is ILIKE %
+(empty) is ILIKE %
+(empty) is ILIKE %
 Andrew is NOT ILIKE X
 Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
 Raphael is NOT ILIKE R
@@ -150,7 +160,9 @@ SELECT
     (unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
 FROM test_basic_operator
 ----
+(empty) % (empty) (empty) true false true true
 Andrew X datafusion📊🔥 🔥 false false false false
+NULL % NULL NULL NULL NULL NULL NULL
 NULL R NULL 🔥 NULL NULL NULL false
 Raphael R datafusionДатаФусион аФус false false false false
 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt
index 5d847747693d..80bd7fc59c00 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -816,3 +816,8 @@ query B
 SELECT starts_with('foobar', 'bar')
 ----
 false
+
+query TT
+select '   ', '|'
+----
+    |
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index 24ac379ca5a0..c4975b5b8c8d 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -28,6 +28,8 @@ Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
 Raphael R datafusionДатаФусион аФус
 under_score un_____core un iść core chrząszcz na łące w 東京都
 percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
+(empty) % (empty) (empty)
+NULL % NULL NULL
 NULL R NULL 🔥
 
 # --------------------------------------
@@ -46,11 +48,13 @@ Andrew X
 Raphael R
 under_score un_____core
 percent p%t
+(empty) %
 
 query TT
 select unicode_1, unicode_2 from test_basic_operator where unicode_1 = unicode_2
 ----
 datafusion数据融合 datafusion数据融合
+(empty) (empty)
 
 query TT
 select unicode_1, unicode_2 from test_basic_operator where unicode_1 <> unicode_2
@@ -63,6 +67,7 @@ pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt
 query TT
 select ascii_1, unicode_1 from test_basic_operator where ascii_1 = unicode_1
 ----
+(empty) (empty)
 
 query TT
 select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1
@@ -92,6 +97,8 @@ Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true false tru
 Raphael R datafusionДатаФусион аФус false true false true false true
 under_score un_____core un iść core chrząszcz na łące w 東京都 false true false true false true
 percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt false true false true false true
+(empty) % (empty) (empty) false true true false true false
+NULL % NULL NULL NULL NULL NULL NULL NULL NULL
 NULL R NULL 🔥 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -111,6 +118,8 @@ Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
 under_score un iść core false true false true
 percent pan Tadeusz ma iść w kąt false true false true
+(empty) (empty) false true false true
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -130,6 +139,8 @@ Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
 under_score un iść core false true false true
 percent pan Tadeusz ma iść w kąt false true false true
+(empty) (empty) false true false true
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -149,6 +160,8 @@ Xiangpeng datafusion数据融合 false true true false
 Raphael datafusionДатаФусион false true false true
 under_score un iść core false true false true
 percent pan Tadeusz ma iść w kąt false true false true
+(empty) (empty) false true false true
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -193,6 +206,8 @@ Xia Xia dat dat
 Rap R dat аФу
 und un_ un  chr
 per p%t pan Pan
+(empty) % (empty) (empty)
+NULL % NULL NULL
 NULL R NULL 🔥
 
 # --------------------------------------
@@ -205,7 +220,7 @@ SELECT
 FROM
     test_basic_operator
 ----
-5 5
+6 6
 
 query II
 SELECT
@@ -220,8 +235,9 @@ GROUP BY ascii_2;
 1 1
 1 1
 1 1
+1 1
 
-query II
+query II rowsort
 SELECT
     COUNT(DISTINCT ascii_1),
     COUNT(DISTINCT unicode_1)
@@ -229,6 +245,8 @@ FROM
     test_basic_operator
 GROUP BY unicode_2;
 ----
+0 0
+1 1
 1 1
 1 1
 1 1
@@ -252,6 +270,8 @@ true true false false
 true false false false
 false false false false
 false false false false
+false true true false
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 query BBBB
@@ -267,6 +287,8 @@ false false true true
 false false true false
 false false false false
 false false false false
+false false false false
+NULL false NULL NULL
 NULL false NULL false
 
 # --------------------------------------
@@ -283,6 +305,8 @@ Xiangpeng
 Raphael
 under_scrre
 percent
+(empty)
+NULL
 NULL
 
 query T
@@ -295,6 +319,8 @@ databusirn数据融合
 databusirnДатаФусион
 un iść crre
 pan Tadeusz ma iść w kąt
+(empty)
+NULL
 NULL
 
 # --------------------------------------
@@ -312,6 +338,8 @@ Xiangpfng
 Raphafl
 undfr_score
 pfrcent
+(empty)
+NULL
 NULL
 
 # Should run REGEXP_REPLACE with Scalar value for string with flag
@@ -325,6 +353,8 @@ Xiangpfng
 Raphafl
 undfr_score
 pfrcent
+(empty)
+NULL
 NULL
 
 # Should run REGEXP_REPLACE with ScalarArray value for string
@@ -338,6 +368,8 @@ Xiangpeng
 Raphael
 bar
 bar
+bar
+NULL
 NULL
 
 # Should run REGEXP_REPLACE with ScalarArray value for string with flag
@@ -351,6 +383,8 @@ Xiangpeng
 Raphael
 bar
 bar
+bar
+NULL
 NULL
 
 # --------------------------------------
@@ -373,6 +407,8 @@ Xiangpeng Datafusion数据融合
 Raphael Datafusionдатафусион
 Under_Score Un Iść Core
 Percent Pan Tadeusz Ma Iść W KąT
+(empty) (empty)
+NULL NULL
 NULL NULL
 
 statement ok
@@ -395,6 +431,8 @@ FROM test_basic_operator;
 82 82 100 1072
 117 117 117 99
 112 112 112 80
+0 37 0 0
+NULL 37 NULL NULL
 NULL 82 NULL 128293
 
 # --------------------------------------
@@ -417,6 +455,8 @@ Xiangpeng Xiangpeng NULL datafusion数据融合 datafusion数据融合 NULL
 Raphael Raphael NULL datafusionДатаФусион datafusionДатаФусион NULL
 under_score under_score NULL un iść core un iść core NULL
 percent percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL
+(empty) (empty) NULL (empty) (empty) NULL
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -439,6 +479,8 @@ Xiangpeng (empty) NULL datafusion数据融合 NULL datafusion数据融合
 Raphael aphael NULL datafusionДатаФусион NULL datafusionДатаФусион
 under_score der_score NULL un iść core NULL un iść core
 percent ercent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt
+(empty) (empty) NULL (empty) NULL (empty)
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -460,6 +502,8 @@ Xiangpeng (empty) Xiangpeng NULL datafusion数据融合
 Raphael Raphael Raphael NULL datafusionДатаФусион
 under_sco under_s under_score NULL un iść core
 percent percen percent NULL pan Tadeusz ma iść w kąt
+(empty) (empty) (empty) NULL (empty)
+NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -481,6 +525,8 @@ false true NULL true NULL false
 false true NULL true NULL false
 false false NULL false NULL false
 false false NULL false NULL false
+false false NULL true NULL false
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -495,6 +541,8 @@ xiangpeng datafusion数据融合
 raphael datafusionдатафусион
 under_score un iść core
 percent pan tadeusz ma iść w kąt
+(empty) (empty)
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -509,6 +557,8 @@ XIANGPENG DATAFUSION数据融合
 RAPHAEL DATAFUSIONДАТАФУСИОН
 UNDER_SCORE UN IŚĆ CORE
 PERCENT PAN TADEUSZ MA IŚĆ W KĄT
+(empty) (empty)
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -536,6 +586,8 @@ Xiangpeng:Data XiangpengXiangpeng Xiangpeng Xiangpengdatafusion数据融合 Xian
 Raphael:Data RaphaelR Raphael RaphaeldatafusionДатаФусион RaphaelаФус datafusionДатаФусионRaphael datafusionДатаФусионаФус datafusionДатаФусион datafusionДатаФусион🔥 🔥 (empty) Raphael,datafusionДатаФусион
 under_score:Data under_scoreun_____core under_score under_scoreun iść core under_scorechrząszcz na łące w 東京都 un iść coreunder_score un iść corechrząszcz na łące w 東京都 un iść core un iść core🔥 🔥 (empty) under_score,un iść core
 percent:Data percentp%t percent percentpan Tadeusz ma iść w kąt percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątpercent pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt🔥 🔥 (empty) percent,pan Tadeusz ma iść w kąt
+:Data % (empty) (empty) (empty) (empty) (empty) (empty) 🔥 🔥 (empty) ,
+:Data % (empty) (empty) (empty) (empty) (empty) (empty) 🔥 🔥 (empty) ,
 :Data R (empty) (empty) 🔥 (empty) 🔥 (empty) 🔥 🔥 (empty) ,
 
 # --------------------------------------
@@ -557,6 +609,8 @@ Xfoogpeng dfoofusion数据融合 X🔥angpeng d🔥tafusion数据融合 NULL NUL
 Rfooael dfoofusionДатаФусион R🔥phael d🔥tafusionДатаФусион NULL NULL
 ufoor_score ufoość core u🔥der_score u🔥 iść core NULL NULL
 pfooent pfooTadeusz ma iść w kąt p🔥rcent p🔥n Tadeusz ma iść w kąt NULL NULL
+foo foo 🔥 🔥 NULL NULL
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -578,6 +632,8 @@ Xiangpeng bar NULL bar NULL datafusion数据融合
 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион
 under_score under_score NULL un iść core NULL un iść core
 percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt
+(empty) (empty) NULL bar NULL (empty)
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -599,6 +655,8 @@ eng (empty) ngpeng 据融合 (empty) afusion数据融合
 ael (empty) hael ион (empty) afusionДатаФусион
 ore (empty) er_score ore (empty) iść core
 ent (empty) cent kąt (empty)  Tadeusz ma iść w kąt
+(empty) (empty) (empty) (empty) (empty) (empty)
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -621,6 +679,8 @@ Xia (empty) Xiangp dat (empty) datafusion数
 Rap (empty) Raph dat (empty) datafusionДатаФус
 und (empty) under_sc un  (empty) un iść c
 per (empty) perc pan (empty) pan Tadeusz ma iść w 
+(empty) (empty) (empty) (empty) (empty) (empty)
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -640,6 +700,8 @@ Xi Xiangpeng datafusion数据融合 datafusion数据融合
 R Raph datafusionД datafusionДат
 under_score under_score un iść core un iść core
 percent percent pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt
+(empty) (empty) (empty) (empty)
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -659,6 +721,8 @@ FROM test_basic_operator;
 0 3 0 3
 0 0 0 0
 0 0 0 0
+0 0 0 0
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -680,6 +744,8 @@ Xiangpengfoo Xiangpeng🔥 datafusion数据融合foo datafusion数据融合🔥
 Raphaelfoo Raphael🔥 datafusionДатаФусионfoo datafusionДатаФусион🔥
 under_scorefoo under_score🔥 un iść corefoo un iść core🔥
 percentfoo percent🔥 pan Tadeusz ma iść w kątfoo pan Tadeusz ma iść w kąt🔥
+foo 🔥 foo 🔥
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # || same type (column1 has null, so also tests NULL || NULL)
@@ -697,6 +763,8 @@ XiangpengXiangpeng Xiangpengdatafusion数据融合 datafusion数据融合Xiangpe
 RaphaelR RaphaelаФус datafusionДатаФусионR datafusionДатаФусионаФус
 under_scoreun_____core under_scorechrząszcz na łące w 東京都 un iść coreun_____core un iść corechrząszcz na łące w 東京都
 percentp%t percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątp%t pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt
+% (empty) % (empty)
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -714,6 +782,8 @@ true false
 false true
 false false
 false false
+false false
+NULL NULL
 NULL NULL
 
 query BB
@@ -727,6 +797,8 @@ false false
 false true
 false false
 false false
+false false
+NULL NULL
 NULL NULL
 
 query BB
@@ -740,6 +812,8 @@ true false
 true true
 true true
 true true
+true true
+NULL NULL
 NULL NULL
 
 query BB
@@ -753,6 +827,8 @@ false false
 true true
 true true
 true true
+true true
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -773,6 +849,8 @@ Xiangpeng nice Xiangpeng and Xiangpeng datafusion数据融合 cool datafusion数
 Raphael nice Raphael and R datafusionДатаФусион cool datafusionДатаФусион and аФус Raphael 🔥 datafusionДатаФусион
 under_score nice under_score and un_____core un iść core cool un iść core and chrząszcz na łące w 東京都 under_score 🔥 un iść core
 percent nice percent and p%t pan Tadeusz ma iść w kąt cool pan Tadeusz ma iść w kąt and Pan Tadeusz ma frunąć stąd w kąt percent 🔥 pan Tadeusz ma iść w kąt
+ nice  and %  cool  and   🔥 
+NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -809,6 +887,8 @@ Xiangpeng datafusion数据融合 false false false false
 Raphael datafusionДатаФусион false false false false
 under_score un iść core false false false false
 percent pan Tadeusz ma iść w kąt false false false false
+(empty) (empty) false false false false
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -827,6 +907,8 @@ FROM
 7 20
 11 11
 7 24
+0 0
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -846,6 +928,8 @@ false true NULL NULL
 false true NULL NULL
 false false NULL NULL
 false false NULL NULL
+false false NULL NULL
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -865,6 +949,8 @@ false false NULL NULL
 false true NULL NULL
 false false NULL NULL
 false false NULL NULL
+false false NULL NULL
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -884,6 +970,8 @@ FROM test_basic_operator;
 6 10 NULL NULL
 8 13 NULL NULL
 6 19 NULL NULL
+6 14 NULL NULL
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 # --------------------------------------
@@ -903,20 +991,25 @@ xxxxxxxxxxxXiangpeng NULL 🔥🔥🔥🔥🔥🔥datafusion数据融合 NULL
 xxxxxxxxxxxxxRaphael NULL datafusionДатаФусион NULL
 xxxxxxxxxunder_score NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥un iść core NULL
 xxxxxxxxxxxxxpercent NULL pan Tadeusz ma iść w NULL
+xxxxxxxxxxxxxxxxxxxx NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
-query TT
+query TTT
 SELECT
   LPAD(ascii_1, 20),
-  LPAD(unicode_1, 20)
+  LPAD(unicode_1, 20),
+  '|'
 FROM test_basic_operator;
 ----
-              Andrew         datafusion📊🔥
-           Xiangpeng       datafusion数据融合
-             Raphael datafusionДатаФусион
-         under_score          un iść core
-             percent pan Tadeusz ma iść w
-NULL NULL
+              Andrew         datafusion📊🔥 |
+           Xiangpeng       datafusion数据融合 |
+             Raphael datafusionДатаФусион |
+         under_score          un iść core |
+             percent pan Tadeusz ma iść w |
+                                          |
+NULL NULL |
+NULL NULL |
 
 # --------------------------------------
 # Test RPAD
@@ -935,6 +1028,8 @@ Xiangpengxxxxxxxxxxx NULL datafusion数据融合🔥🔥🔥🔥🔥🔥 NULL
 Raphaelxxxxxxxxxxxxx NULL datafusionДатаФусион NULL
 under_scorexxxxxxxxx NULL un iść core🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL
 percentxxxxxxxxxxxxx NULL pan Tadeusz ma iść w NULL
+xxxxxxxxxxxxxxxxxxxx NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL
+NULL NULL NULL NULL
 NULL NULL NULL NULL
 
 query TT
@@ -948,6 +1043,8 @@ Xiangpeng            datafusion数据融合
 Raphael              datafusionДатаФусион
 under_score          un iść core         
 percent              pan Tadeusz ma iść w
+                                         
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -973,6 +1070,8 @@ true false NULL NULL true false NULL NULL
 false true NULL NULL false true NULL NULL
 false false NULL NULL false false NULL NULL
 false false NULL NULL false false NULL NULL
+false false NULL NULL false false NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -999,6 +1098,8 @@ NULL NULL NULL NULL NULL [таФ] NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
 # Test REPEAT
@@ -1015,6 +1116,8 @@ XiangpengXiangpengXiangpeng datafusion数据融合datafusion数据融合datafusi
 RaphaelRaphaelRaphael datafusionДатаФусионdatafusionДатаФусионdatafusionДатаФусион
 under_scoreunder_scoreunder_score un iść coreun iść coreun iść core
 percentpercentpercent pan Tadeusz ma iść w kątpan Tadeusz ma iść w kątpan Tadeusz ma iść w kąt
+(empty) (empty)
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -1036,6 +1139,8 @@ Xiangp ng NULL datafusion数据融合 (empty) NULL
 Rapha l NULL datafusionДатаФус он NULL
 und r_scor NULL un iść core (empty) NULL
 p rc NULL pan Tadeusz ma iść w kąt (empty) NULL
+(empty) (empty) NULL (empty) (empty) NULL
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -1053,6 +1158,8 @@ gnepgnaiX 合融据数noisufatad
 leahpaR ноисуФатаДnoisufatad
 erocs_rednu eroc ćśi nu
 tnecrep tąk w ćśi am zsuedaT nap
+(empty) (empty)
+NULL NULL
 NULL NULL
 
 # --------------------------------------
@@ -1074,6 +1181,8 @@ FROM test_basic_operator;
 6 0 NULL 18 18 NULL
 4 0 NULL 0 0 NULL
 2 0 NULL 0 0 NULL
+0 0 NULL 0 0 NULL
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
@@ -1095,4 +1204,6 @@ Xiangp Xi NULL datafusion数据融合 datafusion数 NULL
 Rapha Raphael NULL datafusionДатаФус datafusionДатаФусион NULL
 und under_score NULL un iść core un iść core NULL
 p percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL
+(empty) (empty) NULL (empty) (empty) NULL
+NULL NULL NULL NULL NULL NULL
 NULL NULL NULL NULL NULL NULL
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt
index 997dca719147..43b08cb25f3f 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -60,6 +60,7 @@ create table test_source as values
   ('Andrew', 'X'),
   ('Xiangpeng', 'Xiangpeng'),
   ('Raphael', 'R'),
+  ('', 'Warsaw'),
   (NULL, 'R');
 
 # Table with the different combination of column types
@@ -89,6 +90,7 @@ select octet_length(column1_utf8view) from test;
 6
 9
 7
+0
 NULL
 
 query error DataFusion error: Arrow error: Compute error: bit_length not supported for Utf8View
@@ -100,6 +102,7 @@ select btrim(column1_large_utf8) from test;
 Andrew
 Xiangpeng
 Raphael
+(empty)
 NULL
 
 ########
@@ -119,6 +122,7 @@ from test;
 Andrew X false false true true
 Xiangpeng Xiangpeng true true false false
 Raphael R false false true true
+(empty) Warsaw false false true true
 NULL R NULL NULL NULL NULL
 
 # test StringViewArray with LargeUtf8 columns
@@ -134,6 +138,7 @@ from test;
 Andrew X false false true true
 Xiangpeng Xiangpeng true true false false
 Raphael R false false true true
+(empty) Warsaw false false true true
 NULL R NULL NULL NULL NULL
 
 ########
@@ -153,6 +158,7 @@ from test;
 Andrew X false false true true
 Xiangpeng Xiangpeng true true false false
 Raphael R false false true true
+(empty) Warsaw false false true true
 NULL R NULL NULL NULL NULL
 
 # StringView column to Dict scalar
@@ -168,6 +174,7 @@ from test;
 Andrew X true true false false
 Xiangpeng Xiangpeng false false true true
 Raphael R false false true true
+(empty) Warsaw false false true true
 NULL R NULL NULL NULL NULL
 
 # Dict column to StringView scalar
@@ -183,6 +190,7 @@ from test;
 Andrew X true true false false
 Xiangpeng Xiangpeng false false true true
 Raphael R false false true true
+(empty) Warsaw false false true true
 NULL R NULL NULL NULL NULL
 
 ########
@@ -296,6 +304,7 @@ FROM test;
 false false false
 true true true
 true true true
+false false false
 NULL NULL NULL
 
 # Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
@@ -322,6 +331,7 @@ FROM test;
 false false false
 true true true
 true true true
+false false false
 NULL NULL NULL
 
 
@@ -702,6 +712,7 @@ FROM test;
 AndrewX
 XiangpengXiangpeng
 RaphaelR
+Warsaw
 R
 
 ## Should run CONCAT successfully with utf8 utf8view and largeutf8
@@ -713,6 +724,7 @@ FROM test;
 AndrewXX
 XiangpengXiangpengXiangpeng
 RaphaelRR
+WarsawWarsaw
 RR
 
 ## Ensure no casts for REGEXP_LIKE
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 4a2d9e1d6864..29ff62ab34f9 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -3679,14 +3679,14 @@ SELECT
 FROM score_board s
 ORDER BY team_name, score;
 ----
-Mongrels	Apu	350	1
-Mongrels	Ned	666	1
-Mongrels	Meg	1030	2
-Mongrels	Burns	1270	2
-Simpsons	Homer	1	1
-Simpsons	Lisa	710	1
-Simpsons	Marge	990	2
-Simpsons	Bart	2010	2
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Mongrels Meg 1030 2
+Mongrels Burns 1270 2
+Simpsons Homer 1 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 2
+Simpsons Bart 2010 2
 
 query TTII
 SELECT
@@ -3697,14 +3697,14 @@ SELECT
 FROM score_board s
 ORDER BY score;
 ----
-Simpsons	Homer	1	1
-Mongrels	Apu	350	1
-Mongrels	Ned	666	1
-Simpsons	Lisa	710	1
-Simpsons	Marge	990	2
-Mongrels	Meg	1030	2
-Mongrels	Burns	1270	2
-Simpsons	Bart	2010	2
+Simpsons Homer 1 1
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 2
+Mongrels Meg 1030 2
+Mongrels Burns 1270 2
+Simpsons Bart 2010 2
 
 query TTII
 SELECT
@@ -3715,14 +3715,14 @@ SELECT
 FROM score_board s
 ORDER BY team_name, score;
 ----
-Mongrels	Apu	350	1
-Mongrels	Ned	666	2
-Mongrels	Meg	1030	3
-Mongrels	Burns	1270	4
-Simpsons	Homer	1	1
-Simpsons	Lisa	710	2
-Simpsons	Marge	990	3
-Simpsons	Bart	2010	4
+Mongrels Apu 350 1
+Mongrels Ned 666 2
+Mongrels Meg 1030 3
+Mongrels Burns 1270 4
+Simpsons Homer 1 1
+Simpsons Lisa 710 2
+Simpsons Marge 990 3
+Simpsons Bart 2010 4
 
 query TTII
 SELECT
@@ -3733,14 +3733,14 @@ SELECT
 FROM score_board s
 ORDER BY team_name, score;
 ----
-Mongrels	Apu	350	1
-Mongrels	Ned	666	1
-Mongrels	Meg	1030	1
-Mongrels	Burns	1270	1
-Simpsons	Homer	1	1
-Simpsons	Lisa	710	1
-Simpsons	Marge	990	1
-Simpsons	Bart	2010	1
+Mongrels Apu 350 1
+Mongrels Ned 666 1
+Mongrels Meg 1030 1
+Mongrels Burns 1270 1
+Simpsons Homer 1 1
+Simpsons Lisa 710 1
+Simpsons Marge 990 1
+Simpsons Bart 2010 1
 
 # incorrect number of parameters for ntile
 query error DataFusion error: Execution error: NTILE requires a positive integer, but finds NULL
@@ -4849,10 +4849,10 @@ SELECT
   nth_value(column2, arrow_cast(2, 'Int32')) OVER (order by column1)
 FROM t;
 ----
-3 1 1 4    4    NULL NULL NULL NULL
-4 1 1 5    5    3    3    4    4
-5 2 2 6    6    4    4    4    4
-6 2 2 NULL NULL 5    5    4    4
+3 1 1 4 4 NULL NULL NULL NULL
+4 1 1 5 5 3 3 4 4
+5 2 2 6 6 4 4 4 4
+6 2 2 NULL NULL 5 5 4 4
 
 # NTILE specifies the argument types so the error is different
 query error

From 87f0838c29ff4b6715dfc204b22216d909186a7e Mon Sep 17 00:00:00 2001
From: Daniel Hegberg <daniel.hegberg@gmail.com>
Date: Fri, 1 Nov 2024 09:42:19 -0700
Subject: [PATCH 22/32] Use single file write when an extension is present in
 the path. (#13079)

* Use single file write when an extension is present in the path.

* Adjust formatting.

* Remove unneeded return statement..
---
 .../src/datasource/file_format/parquet.rs     | 176 ++++++++++++++----
 .../src/datasource/file_format/write/demux.rs |  14 +-
 datafusion/core/src/datasource/listing/url.rs |  63 +++++++
 3 files changed, 206 insertions(+), 47 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 9153e71a5c26..b3f54e0773fd 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -2274,47 +2274,7 @@ mod tests {
 
     #[tokio::test]
     async fn parquet_sink_write() -> Result<()> {
-        let field_a = Field::new("a", DataType::Utf8, false);
-        let field_b = Field::new("b", DataType::Utf8, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-        let object_store_url = ObjectStoreUrl::local_filesystem();
-
-        let file_sink_config = FileSinkConfig {
-            object_store_url: object_store_url.clone(),
-            file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)],
-            table_paths: vec![ListingTableUrl::parse("file:///")?],
-            output_schema: schema.clone(),
-            table_partition_cols: vec![],
-            insert_op: InsertOp::Overwrite,
-            keep_partition_by_columns: false,
-        };
-        let parquet_sink = Arc::new(ParquetSink::new(
-            file_sink_config,
-            TableParquetOptions {
-                key_value_metadata: std::collections::HashMap::from([
-                    ("my-data".to_string(), Some("stuff".to_string())),
-                    ("my-data-bool-key".to_string(), None),
-                ]),
-                ..Default::default()
-            },
-        ));
-
-        // create data
-        let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"]));
-        let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"]));
-        let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap();
-
-        // write stream
-        parquet_sink
-            .write_all(
-                Box::pin(RecordBatchStreamAdapter::new(
-                    schema,
-                    futures::stream::iter(vec![Ok(batch)]),
-                )),
-                &build_ctx(object_store_url.as_ref()),
-            )
-            .await
-            .unwrap();
+        let parquet_sink = create_written_parquet_sink("file:///").await?;
 
         // assert written
         let mut written = parquet_sink.written();
@@ -2366,6 +2326,140 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn parquet_sink_write_with_extension() -> Result<()> {
+        let filename = "test_file.custom_ext";
+        let file_path = format!("file:///path/to/{}", filename);
+        let parquet_sink = create_written_parquet_sink(file_path.as_str()).await?;
+
+        // assert written
+        let mut written = parquet_sink.written();
+        let written = written.drain();
+        assert_eq!(
+            written.len(),
+            1,
+            "expected a single parquet file to be written, instead found {}",
+            written.len()
+        );
+
+        let (path, ..) = written.take(1).next().unwrap();
+
+        let path_parts = path.parts().collect::<Vec<_>>();
+        assert_eq!(
+            path_parts.len(),
+            3,
+            "Expected 3 path parts, instead found {}",
+            path_parts.len()
+        );
+        assert_eq!(path_parts.last().unwrap().as_ref(), filename);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn parquet_sink_write_with_directory_name() -> Result<()> {
+        let file_path = "file:///path/to";
+        let parquet_sink = create_written_parquet_sink(file_path).await?;
+
+        // assert written
+        let mut written = parquet_sink.written();
+        let written = written.drain();
+        assert_eq!(
+            written.len(),
+            1,
+            "expected a single parquet file to be written, instead found {}",
+            written.len()
+        );
+
+        let (path, ..) = written.take(1).next().unwrap();
+
+        let path_parts = path.parts().collect::<Vec<_>>();
+        assert_eq!(
+            path_parts.len(),
+            3,
+            "Expected 3 path parts, instead found {}",
+            path_parts.len()
+        );
+        assert!(path_parts.last().unwrap().as_ref().ends_with(".parquet"));
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn parquet_sink_write_with_folder_ending() -> Result<()> {
+        let file_path = "file:///path/to/";
+        let parquet_sink = create_written_parquet_sink(file_path).await?;
+
+        // assert written
+        let mut written = parquet_sink.written();
+        let written = written.drain();
+        assert_eq!(
+            written.len(),
+            1,
+            "expected a single parquet file to be written, instead found {}",
+            written.len()
+        );
+
+        let (path, ..) = written.take(1).next().unwrap();
+
+        let path_parts = path.parts().collect::<Vec<_>>();
+        assert_eq!(
+            path_parts.len(),
+            3,
+            "Expected 3 path parts, instead found {}",
+            path_parts.len()
+        );
+        assert!(path_parts.last().unwrap().as_ref().ends_with(".parquet"));
+
+        Ok(())
+    }
+
+    async fn create_written_parquet_sink(table_path: &str) -> Result<Arc<ParquetSink>> {
+        let field_a = Field::new("a", DataType::Utf8, false);
+        let field_b = Field::new("b", DataType::Utf8, false);
+        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+        let object_store_url = ObjectStoreUrl::local_filesystem();
+
+        let file_sink_config = FileSinkConfig {
+            object_store_url: object_store_url.clone(),
+            file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)],
+            table_paths: vec![ListingTableUrl::parse(table_path)?],
+            output_schema: schema.clone(),
+            table_partition_cols: vec![],
+            insert_op: InsertOp::Overwrite,
+            keep_partition_by_columns: false,
+        };
+        let parquet_sink = Arc::new(ParquetSink::new(
+            file_sink_config,
+            TableParquetOptions {
+                key_value_metadata: std::collections::HashMap::from([
+                    ("my-data".to_string(), Some("stuff".to_string())),
+                    ("my-data-bool-key".to_string(), None),
+                ]),
+                ..Default::default()
+            },
+        ));
+
+        // create data
+        let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"]));
+        let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"]));
+        let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap();
+
+        // write stream
+        parquet_sink
+            .write_all(
+                Box::pin(RecordBatchStreamAdapter::new(
+                    schema,
+                    futures::stream::iter(vec![Ok(batch)]),
+                )),
+                &build_ctx(object_store_url.as_ref()),
+            )
+            .await
+            .unwrap();
+
+        Ok(parquet_sink)
+    }
+
     #[tokio::test]
     async fn parquet_sink_write_partitions() -> Result<()> {
         let field_a = Field::new("a", DataType::Utf8, false);
diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs
index 1746ffef8282..b03676d53271 100644
--- a/datafusion/core/src/datasource/file_format/write/demux.rs
+++ b/datafusion/core/src/datasource/file_format/write/demux.rs
@@ -59,8 +59,9 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
 /// which should be contained within the same output file. The outer channel
 /// is used to send a dynamic number of inner channels, representing a dynamic
 /// number of total output files. The caller is also responsible to monitor
-/// the demux task for errors and abort accordingly. The single_file_output parameter
-/// overrides all other settings to force only a single file to be written.
+/// the demux task for errors and abort accordingly. A path with an extension will
+/// force only a single file to be written with the extension from the path. Otherwise
+/// the default extension will be used and the output will be split into multiple files.
 /// partition_by parameter will additionally split the input based on the unique
 /// values of a specific column `<https://github.com/apache/datafusion/issues/7744>``
 ///                                                                              ┌───────────┐               ┌────────────┐    ┌─────────────┐
@@ -79,12 +80,13 @@ pub(crate) fn start_demuxer_task(
     context: &Arc<TaskContext>,
     partition_by: Option<Vec<(String, DataType)>>,
     base_output_path: ListingTableUrl,
-    file_extension: String,
+    default_extension: String,
     keep_partition_by_columns: bool,
 ) -> (SpawnedTask<Result<()>>, DemuxedStreamReceiver) {
     let (tx, rx) = mpsc::unbounded_channel();
     let context = context.clone();
-    let single_file_output = !base_output_path.is_collection();
+    let single_file_output =
+        !base_output_path.is_collection() && base_output_path.file_extension().is_some();
     let task = match partition_by {
         Some(parts) => {
             // There could be an arbitrarily large number of parallel hive style partitions being written to, so we cannot
@@ -96,7 +98,7 @@ pub(crate) fn start_demuxer_task(
                     context,
                     parts,
                     base_output_path,
-                    file_extension,
+                    default_extension,
                     keep_partition_by_columns,
                 )
                 .await
@@ -108,7 +110,7 @@ pub(crate) fn start_demuxer_task(
                 input,
                 context,
                 base_output_path,
-                file_extension,
+                default_extension,
                 single_file_output,
             )
             .await
diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs
index 1701707fdb72..e627cacfbfc7 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -190,6 +190,19 @@ impl ListingTableUrl {
         self.url.path().ends_with(DELIMITER)
     }
 
+    /// Returns the file extension of the last path segment if it exists
+    pub fn file_extension(&self) -> Option<&str> {
+        if let Some(segments) = self.url.path_segments() {
+            if let Some(last_segment) = segments.last() {
+                if last_segment.contains(".") && !last_segment.ends_with(".") {
+                    return last_segment.split('.').last();
+                }
+            }
+        }
+
+        None
+    }
+
     /// Strips the prefix of this [`ListingTableUrl`] from the provided path, returning
     /// an iterator of the remaining path segments
     pub(crate) fn strip_prefix<'a, 'b: 'a>(
@@ -493,4 +506,54 @@ mod tests {
             "path not ends with / - fragment ends with / - not collection",
         );
     }
+
+    #[test]
+    fn test_file_extension() {
+        fn test(input: &str, expected: Option<&str>, message: &str) {
+            let url = ListingTableUrl::parse(input).unwrap();
+            assert_eq!(url.file_extension(), expected, "{message}");
+        }
+
+        test("https://a.b.c/path/", None, "path ends with / - not a file");
+        test(
+            "https://a.b.c/path/?a=b",
+            None,
+            "path ends with / - with query args - not a file",
+        );
+        test(
+            "https://a.b.c/path?a=b/",
+            None,
+            "path not ends with / - query ends with / but no file extension",
+        );
+        test(
+            "https://a.b.c/path/#a=b",
+            None,
+            "path ends with / - with fragment - not a file",
+        );
+        test(
+            "https://a.b.c/path#a=b/",
+            None,
+            "path not ends with / - fragment ends with / but no file extension",
+        );
+        test(
+            "file///some/path/",
+            None,
+            "file path ends with / - not a file",
+        );
+        test(
+            "file///some/path/file",
+            None,
+            "file path does not end with - no extension",
+        );
+        test(
+            "file///some/path/file.",
+            None,
+            "file path ends with . - no value after .",
+        );
+        test(
+            "file///some/path/file.ext",
+            Some("ext"),
+            "file path ends with .ext - extension is ext",
+        );
+    }
 }

From a34e23783afa99024068219d6070bd45f4d6830b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 1 Nov 2024 12:47:25 -0400
Subject: [PATCH 23/32] Minor: make `Expr::volatile` infallible (#13206)

* Minor: make `Expr::volatile` infallible

* cmt
---
 datafusion/expr/src/expr.rs                  | 9 +++++++--
 datafusion/optimizer/src/push_down_filter.rs | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index bda4d7ae3d7f..a9c183952fc7 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1574,8 +1574,13 @@ impl Expr {
 
     /// Returns true if the expression is volatile, i.e. whether it can return different
     /// results when evaluated multiple times with the same input.
-    pub fn is_volatile(&self) -> Result<bool> {
-        self.exists(|expr| Ok(expr.is_volatile_node()))
+    ///
+    /// For example the function call `RANDOM()` is volatile as each call will
+    /// return a different value.
+    ///
+    /// See [`Volatility`] for more information.
+    pub fn is_volatile(&self) -> bool {
+        self.exists(|expr| Ok(expr.is_volatile_node())).unwrap()
     }
 
     /// Recursively find all [`Expr::Placeholder`] expressions, and
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 1d0fc207cb51..269ce2910074 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1200,7 +1200,7 @@ fn rewrite_projection(
 
             (qualified_name(qualifier, field.name()), expr)
         })
-        .partition(|(_, value)| value.is_volatile().unwrap_or(true));
+        .partition(|(_, value)| value.is_volatile());
 
     let mut push_predicates = vec![];
     let mut keep_predicates = vec![];

From 592b92440c93deab341eb65c0f31a2b0f100c2df Mon Sep 17 00:00:00 2001
From: Victor Barua <victor.barua@datadoghq.com>
Date: Fri, 1 Nov 2024 10:23:29 -0700
Subject: [PATCH 24/32] feat(substrait): handle emit_kind when consuming
 Substrait plans (#13127)

* feat(substrait): handle emit_kind when consuming Substrait plans

* cargo fmt

* avoid projection flattening for volatile expressions

* simplify application of apply_emit_kind
---
 .../substrait/src/logical_plan/consumer.rs    | 200 +++++++++++++++---
 .../substrait/tests/cases/emit_kind_tests.rs  | 127 +++++++++++
 datafusion/substrait/tests/cases/mod.rs       |   1 +
 .../direct_on_project.substrait.json          |  90 ++++++++
 .../emit_kind/emit_on_filter.substrait.json   |  91 ++++++++
 5 files changed, 483 insertions(+), 26 deletions(-)
 create mode 100644 datafusion/substrait/tests/cases/emit_kind_tests.rs
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 289aa7b7f448..a12406bd3439 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -56,6 +56,7 @@ use datafusion::arrow::array::{new_empty_array, AsArray};
 use datafusion::arrow::temporal_conversions::NANOSECONDS;
 use datafusion::common::scalar::ScalarStructBuilder;
 use datafusion::dataframe::DataFrame;
+use datafusion::logical_expr::builder::project;
 use datafusion::logical_expr::expr::InList;
 use datafusion::logical_expr::{
     col, expr, Cast, Extension, GroupingSet, Like, LogicalPlanBuilder, Partitioning,
@@ -80,6 +81,7 @@ use substrait::proto::expression::literal::{
 use substrait::proto::expression::subquery::SubqueryType;
 use substrait::proto::expression::{FieldReference, Literal, ScalarFunction};
 use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
+use substrait::proto::rel_common::{Emit, EmitKind};
 use substrait::proto::{
     aggregate_function::AggregationInvocation,
     expression::{
@@ -93,9 +95,9 @@ use substrait::proto::{
     join_rel, plan_rel, r#type,
     read_rel::ReadType,
     rel::RelType,
-    set_rel,
+    rel_common, set_rel,
     sort_field::{SortDirection, SortKind::*},
-    AggregateFunction, Expression, NamedStruct, Plan, Rel, Type,
+    AggregateFunction, Expression, NamedStruct, Plan, Rel, RelCommon, Type,
 };
 use substrait::proto::{ExtendedExpression, FunctionArgument, SortField};
 
@@ -562,42 +564,51 @@ pub async fn from_substrait_rel(
     rel: &Rel,
     extensions: &Extensions,
 ) -> Result<LogicalPlan> {
-    match &rel.rel_type {
+    let plan: Result<LogicalPlan> = match &rel.rel_type {
         Some(RelType::Project(p)) => {
             if let Some(input) = p.input.as_ref() {
                 let mut input = LogicalPlanBuilder::from(
                     from_substrait_rel(ctx, input, extensions).await?,
                 );
-                let mut names: HashSet<String> = HashSet::new();
-                let mut exprs: Vec<Expr> = vec![];
-                for e in &p.expressions {
-                    let x =
-                        from_substrait_rex(ctx, e, input.clone().schema(), extensions)
+                let original_schema = input.schema().clone();
+
+                // Ensure that all expressions have a unique display name, so that
+                // validate_unique_names does not fail when constructing the project.
+                let mut name_tracker = NameTracker::new();
+
+                // By default, a Substrait Project emits all inputs fields followed by all expressions.
+                // We build the explicit expressions first, and then the input expressions to avoid
+                // adding aliases to the explicit expressions (as part of ensuring unique names).
+                //
+                // This is helpful for plan visualization and tests, because when DataFusion produces
+                // Substrait Projects it adds an output mapping that excludes all input columns
+                // leaving only explicit expressions.
+
+                let mut explicit_exprs: Vec<Expr> = vec![];
+                for expr in &p.expressions {
+                    let e =
+                        from_substrait_rex(ctx, expr, input.clone().schema(), extensions)
                             .await?;
                     // if the expression is WindowFunction, wrap in a Window relation
-                    if let Expr::WindowFunction(_) = &x {
+                    if let Expr::WindowFunction(_) = &e {
                         // Adding the same expression here and in the project below
                         // works because the project's builder uses columnize_expr(..)
                         // to transform it into a column reference
-                        input = input.window(vec![x.clone()])?
+                        input = input.window(vec![e.clone()])?
                     }
-                    // Ensure the expression has a unique display name, so that project's
-                    // validate_unique_names doesn't fail
-                    let name = x.schema_name().to_string();
-                    let mut new_name = name.clone();
-                    let mut i = 0;
-                    while names.contains(&new_name) {
-                        new_name = format!("{}__temp__{}", name, i);
-                        i += 1;
-                    }
-                    if new_name != name {
-                        exprs.push(x.alias(new_name.clone()));
-                    } else {
-                        exprs.push(x);
-                    }
-                    names.insert(new_name);
+                    explicit_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
                 }
-                input.project(exprs)?.build()
+
+                let mut final_exprs: Vec<Expr> = vec![];
+                for index in 0..original_schema.fields().len() {
+                    let e = Expr::Column(Column::from(
+                        original_schema.qualified_field(index),
+                    ));
+                    final_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
+                }
+                final_exprs.append(&mut explicit_exprs);
+
+                input.project(final_exprs)?.build()
             } else {
                 not_impl_err!("Projection without an input is not supported")
             }
@@ -1074,6 +1085,143 @@ pub async fn from_substrait_rel(
             }))
         }
         _ => not_impl_err!("Unsupported RelType: {:?}", rel.rel_type),
+    };
+    apply_emit_kind(retrieve_rel_common(rel), plan?)
+}
+
+fn retrieve_rel_common(rel: &Rel) -> Option<&RelCommon> {
+    match rel.rel_type.as_ref() {
+        None => None,
+        Some(rt) => match rt {
+            RelType::Read(r) => r.common.as_ref(),
+            RelType::Filter(f) => f.common.as_ref(),
+            RelType::Fetch(f) => f.common.as_ref(),
+            RelType::Aggregate(a) => a.common.as_ref(),
+            RelType::Sort(s) => s.common.as_ref(),
+            RelType::Join(j) => j.common.as_ref(),
+            RelType::Project(p) => p.common.as_ref(),
+            RelType::Set(s) => s.common.as_ref(),
+            RelType::ExtensionSingle(e) => e.common.as_ref(),
+            RelType::ExtensionMulti(e) => e.common.as_ref(),
+            RelType::ExtensionLeaf(e) => e.common.as_ref(),
+            RelType::Cross(c) => c.common.as_ref(),
+            RelType::Reference(_) => None,
+            RelType::Write(w) => w.common.as_ref(),
+            RelType::Ddl(d) => d.common.as_ref(),
+            RelType::HashJoin(j) => j.common.as_ref(),
+            RelType::MergeJoin(j) => j.common.as_ref(),
+            RelType::NestedLoopJoin(j) => j.common.as_ref(),
+            RelType::Window(w) => w.common.as_ref(),
+            RelType::Exchange(e) => e.common.as_ref(),
+            RelType::Expand(e) => e.common.as_ref(),
+        },
+    }
+}
+
+fn retrieve_emit_kind(rel_common: Option<&RelCommon>) -> EmitKind {
+    // the default EmitKind is Direct if it is not set explicitly
+    let default = EmitKind::Direct(rel_common::Direct {});
+    rel_common
+        .and_then(|rc| rc.emit_kind.as_ref())
+        .map_or(default, |ek| ek.clone())
+}
+
+fn contains_volatile_expr(proj: &Projection) -> Result<bool> {
+    for expr in proj.expr.iter() {
+        if expr.is_volatile()? {
+            return Ok(true);
+        }
+    }
+    Ok(false)
+}
+
+fn apply_emit_kind(
+    rel_common: Option<&RelCommon>,
+    plan: LogicalPlan,
+) -> Result<LogicalPlan> {
+    match retrieve_emit_kind(rel_common) {
+        EmitKind::Direct(_) => Ok(plan),
+        EmitKind::Emit(Emit { output_mapping }) => {
+            // It is valid to reference the same field multiple times in the Emit
+            // In this case, we need to provide unique names to avoid collisions
+            let mut name_tracker = NameTracker::new();
+            match plan {
+                // To avoid adding a projection on top of a projection, we apply special case
+                // handling to flatten Substrait Emits. This is only applicable if none of the
+                // expressions in the projection are volatile. This is to avoid issues like
+                // converting a single call of the random() function into multiple calls due to
+                // duplicate fields in the output_mapping.
+                LogicalPlan::Projection(proj) if !contains_volatile_expr(&proj)? => {
+                    let mut exprs: Vec<Expr> = vec![];
+                    for field in output_mapping {
+                        let expr = proj.expr
+                            .get(field as usize)
+                            .ok_or_else(|| substrait_datafusion_err!(
+                                  "Emit output field {} cannot be resolved in input schema {}",
+                                  field, proj.input.schema().clone()
+                                ))?;
+                        exprs.push(name_tracker.get_uniquely_named_expr(expr.clone())?);
+                    }
+
+                    let input = Arc::unwrap_or_clone(proj.input);
+                    project(input, exprs)
+                }
+                // Otherwise we just handle the output_mapping as a projection
+                _ => {
+                    let input_schema = plan.schema();
+
+                    let mut exprs: Vec<Expr> = vec![];
+                    for index in output_mapping.into_iter() {
+                        let column = Expr::Column(Column::from(
+                            input_schema.qualified_field(index as usize),
+                        ));
+                        let expr = name_tracker.get_uniquely_named_expr(column)?;
+                        exprs.push(expr);
+                    }
+
+                    project(plan, exprs)
+                }
+            }
+        }
+    }
+}
+
+struct NameTracker {
+    seen_names: HashSet<String>,
+}
+
+enum NameTrackerStatus {
+    NeverSeen,
+    SeenBefore,
+}
+
+impl NameTracker {
+    fn new() -> Self {
+        NameTracker {
+            seen_names: HashSet::default(),
+        }
+    }
+    fn get_unique_name(&mut self, name: String) -> (String, NameTrackerStatus) {
+        match self.seen_names.insert(name.clone()) {
+            true => (name, NameTrackerStatus::NeverSeen),
+            false => {
+                let mut counter = 0;
+                loop {
+                    let candidate_name = format!("{}__temp__{}", name, counter);
+                    if self.seen_names.insert(candidate_name.clone()) {
+                        return (candidate_name, NameTrackerStatus::SeenBefore);
+                    }
+                    counter += 1;
+                }
+            }
+        }
+    }
+
+    fn get_uniquely_named_expr(&mut self, expr: Expr) -> Result<Expr> {
+        match self.get_unique_name(expr.name_for_alias()?) {
+            (_, NameTrackerStatus::NeverSeen) => Ok(expr),
+            (name, NameTrackerStatus::SeenBefore) => Ok(expr.alias(name)),
+        }
     }
 }
 
diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs
new file mode 100644
index 000000000000..ac66177ed796
--- /dev/null
+++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for Emit Kind usage
+
+#[cfg(test)]
+mod tests {
+    use crate::utils::test::{add_plan_schemas_to_ctx, read_json};
+
+    use datafusion::common::Result;
+    use datafusion::execution::SessionStateBuilder;
+    use datafusion::prelude::{CsvReadOptions, SessionConfig, SessionContext};
+    use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
+    use datafusion_substrait::logical_plan::producer::to_substrait_plan;
+
+    #[tokio::test]
+    async fn project_respects_direct_emit_kind() -> Result<()> {
+        let proto_plan = read_json(
+            "tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json",
+        );
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx, &proto_plan).await?;
+
+        let plan_str = format!("{}", plan);
+
+        assert_eq!(
+            plan_str,
+            "Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1\
+            \n  TableScan: DATA"
+        );
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn handle_emit_as_project() -> Result<()> {
+        let proto_plan = read_json(
+            "tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json",
+        );
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx, &proto_plan).await?;
+
+        let plan_str = format!("{}", plan);
+
+        assert_eq!(
+            plan_str,
+            // Note that duplicate references in the remap are aliased
+            "Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2\
+             \n  Filter: DATA.B = Int64(2)\
+             \n    TableScan: DATA"
+        );
+        Ok(())
+    }
+
+    async fn make_context() -> Result<SessionContext> {
+        let state = SessionStateBuilder::new()
+            .with_config(SessionConfig::default())
+            .with_default_features()
+            .build();
+        let ctx = SessionContext::new_with_state(state);
+        ctx.register_csv("data", "tests/testdata/data.csv", CsvReadOptions::default())
+            .await?;
+        Ok(ctx)
+    }
+
+    #[tokio::test]
+    async fn handle_emit_as_project_with_volatile_expr() -> Result<()> {
+        let ctx = make_context().await?;
+
+        let df = ctx
+            .sql("SELECT random() AS c1, a + 1 AS c2 FROM data")
+            .await?;
+
+        let plan = df.into_unoptimized_plan();
+        assert_eq!(
+            format!("{}", plan),
+            "Projection: random() AS c1, data.a + Int64(1) AS c2\
+             \n  TableScan: data"
+        );
+
+        let proto = to_substrait_plan(&plan, &ctx)?;
+        let plan2 = from_substrait_plan(&ctx, &proto).await?;
+        // note how the Projections are not flattened
+        assert_eq!(
+            format!("{}", plan2),
+            "Projection: random() AS c1, data.a + Int64(1) AS c2\
+             \n  Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)\
+             \n    TableScan: data"
+        );
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn handle_emit_as_project_without_volatile_exprs() -> Result<()> {
+        let ctx = make_context().await?;
+        let df = ctx.sql("SELECT a + 1, b + 2 FROM data").await?;
+
+        let plan = df.into_unoptimized_plan();
+        assert_eq!(
+            format!("{}", plan),
+            "Projection: data.a + Int64(1), data.b + Int64(2)\
+             \n  TableScan: data"
+        );
+
+        let proto = to_substrait_plan(&plan, &ctx)?;
+        let plan2 = from_substrait_plan(&ctx, &proto).await?;
+
+        let plan1str = format!("{plan}");
+        let plan2str = format!("{plan2}");
+        assert_eq!(plan1str, plan2str);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/substrait/tests/cases/mod.rs b/datafusion/substrait/tests/cases/mod.rs
index 42aa23626106..b1f4b95df66f 100644
--- a/datafusion/substrait/tests/cases/mod.rs
+++ b/datafusion/substrait/tests/cases/mod.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 mod consumer_integration;
+mod emit_kind_tests;
 mod function_test;
 mod logical_plans;
 mod roundtrip_logical_plan;
diff --git a/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json b/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json
new file mode 100644
index 000000000000..63b275e1723f
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json
@@ -0,0 +1,90 @@
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 1,
+    "uri": "/functions_arithmetic.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 0,
+      "name": "add:i64_i64"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "project": {
+          "common": {
+            "direct": {
+            }
+          },
+          "input": {
+            "read": {
+              "common": {
+                "direct": {
+                }
+              },
+              "baseSchema": {
+                "names": ["A", "B"],
+                "struct": {
+                  "types": [{
+                    "i64": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }, {
+                    "i64": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }],
+                  "typeVariationReference": 0,
+                  "nullability": "NULLABILITY_REQUIRED"
+                }
+              },
+              "namedTable": {
+                "names": ["DATA"]
+              }
+            }
+          },
+          "expressions": [{
+            "scalarFunction": {
+              "functionReference": 0,
+              "args": [],
+              "outputType": {
+                "i64": {
+                  "typeVariationReference": 0,
+                  "nullability": "NULLABILITY_NULLABLE"
+                }
+              },
+              "arguments": [{
+                "value": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }
+              }, {
+                "value": {
+                  "literal": {
+                    "i64": 1,
+                    "nullable": false,
+                    "typeVariationReference": 0
+                  }
+                }
+              }],
+              "options": []
+            }
+          }]
+        }
+      },
+      "names": ["a", "b", "add1"]
+    }
+  }],
+  "expectedTypeUrls": []
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json b/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json
new file mode 100644
index 000000000000..2fc970155955
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json
@@ -0,0 +1,91 @@
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 1,
+    "uri": "/functions_comparison.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 0,
+      "name": "equal:any_any"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "filter": {
+          "common": {
+            "emit": {
+              "outputMapping": [1, 0, 0]
+            }
+          },
+          "input": {
+            "read": {
+              "common": {
+                "direct": {
+                }
+              },
+              "baseSchema": {
+                "names": ["A", "B"],
+                "struct": {
+                  "types": [{
+                    "i64": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }, {
+                    "i64": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }],
+                  "typeVariationReference": 0,
+                  "nullability": "NULLABILITY_REQUIRED"
+                }
+              },
+              "namedTable": {
+                "names": ["DATA"]
+              }
+            }
+          },
+          "condition": {
+            "scalarFunction": {
+              "functionReference": 0,
+              "args": [],
+              "outputType": {
+                "bool": {
+                  "typeVariationReference": 0,
+                  "nullability": "NULLABILITY_NULLABLE"
+                }
+              },
+              "arguments": [{
+                "value": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }
+              }, {
+                "value": {
+                  "literal": {
+                    "i64": "2",
+                    "nullable": false,
+                    "typeVariationReference": 0
+                  }
+                }
+              }],
+              "options": []
+            }
+          }
+        }
+      },
+      "names": ["B", "A1", "A2"]
+    }
+  }],
+  "expectedTypeUrls": []
+}
\ No newline at end of file

From a2e5330d5cad8516c1524bc86fce32cae324b761 Mon Sep 17 00:00:00 2001
From: Martin Hilton <mhilton@influxdata.com>
Date: Fri, 1 Nov 2024 17:24:33 +0000
Subject: [PATCH 25/32] fix: date_bin() on timstamps before 1970 (#13204)

* fix: date_bin() on timstamps before 1970

The date_bin() function was not working correctly for timestamps before
1970. Specifically if the input timestamp was the exact time of the
start of a bin then it would be placed in the previous bin.

The % operator has a negative result when the dividend is negative.
This causes the date_bin calculation to round up to the next bin. To
compensate the size of 1 interval is subtracted from the result if the
input is negative. This subtraction is no longer performed if the input
is already the exact time of the start of a bin.

* fix clippy

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/datetime/date_bin.rs | 30 ++++++++++++++++++-
 .../sqllogictest/test_files/timestamps.slt    | 17 +++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index e335c4e097f7..e8d065df8633 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 {
 fn compute_distance(time_diff: i64, stride: i64) -> i64 {
     let time_delta = time_diff - (time_diff % stride);
 
-    if time_diff < 0 && stride > 1 {
+    if time_diff < 0 && stride > 1 && time_delta != time_diff {
         // The origin is later than the source timestamp, round down to the previous bin
         time_delta - stride
     } else {
@@ -864,4 +864,32 @@ mod tests {
                 assert_eq!(result, expected1, "{source} = {expected}");
             })
     }
+
+    #[test]
+    fn test_date_bin_before_epoch() {
+        let cases = [
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"),
+                "1969-12-31T23:30:00",
+            ),
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"),
+                "1969-12-31T23:45:00",
+            ),
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"),
+                "1969-12-31T23:45:00",
+            ),
+        ];
+
+        cases.iter().for_each(|((stride, source), expected)| {
+            let stride = stride.unwrap();
+            let stride1 = stride.num_nanoseconds().unwrap();
+            let source1 = string_to_timestamp_nanos(source).unwrap();
+
+            let expected1 = string_to_timestamp_nanos(expected).unwrap();
+            let result = date_bin_nanos_interval(stride1, source1, 0);
+            assert_eq!(result, expected1, "{source} = {expected}");
+        })
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 38c2a6647273..a09a63a791fc 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
 ----
 2022-06-01T00:00:00
 
+# Times before the unix epoch
+query P
+select date_bin('1 hour', column1)
+from (values
+  (timestamp '1969-01-01 00:00:00'),
+  (timestamp '1969-01-01 00:15:00'),
+  (timestamp '1969-01-01 00:30:00'),
+  (timestamp '1969-01-01 00:45:00'),
+  (timestamp '1969-01-01 01:00:00')
+) as sq
+----
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T01:00:00
+
 ###
 ## test date_trunc function
 ###

From b7f4db4b56e945944c0448f84c61b6f3b86728f8 Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 1 Nov 2024 18:25:50 +0100
Subject: [PATCH 26/32] Deprecate ScalarUDF::invoke and invoke_no_args for
 invoke_batch (#13179)

`invoke_batch` is the one used now. The others are no longer in use and
we should deprecate and remove them.
---
 datafusion/expr/src/udf.rs                    |  4 +-
 datafusion/functions-nested/benches/map.rs    |  1 +
 .../functions/benches/character_length.rs     | 28 +++++++--
 datafusion/functions/benches/concat.rs        |  5 +-
 datafusion/functions/benches/cot.rs           | 10 ++-
 datafusion/functions/benches/date_bin.rs      |  1 +
 datafusion/functions/benches/encoding.rs      | 12 +++-
 datafusion/functions/benches/isnan.rs         | 10 ++-
 datafusion/functions/benches/iszero.rs        | 10 ++-
 datafusion/functions/benches/lower.rs         | 46 ++++++++++----
 datafusion/functions/benches/ltrim.rs         |  7 ++-
 datafusion/functions/benches/make_date.rs     |  4 ++
 datafusion/functions/benches/nullif.rs        |  5 +-
 datafusion/functions/benches/pad.rs           | 30 +++++++--
 datafusion/functions/benches/repeat.rs        | 42 +++++++++++--
 datafusion/functions/benches/signum.rs        | 10 ++-
 datafusion/functions/benches/strpos.rs        | 28 +++++++--
 datafusion/functions/benches/substr.rs        | 63 ++++++++++++++++---
 datafusion/functions/benches/substr_index.rs  |  1 +
 datafusion/functions/benches/to_char.rs       |  3 +
 datafusion/functions/benches/to_timestamp.rs  |  6 ++
 datafusion/functions/benches/trunc.rs         | 10 ++-
 datafusion/functions/benches/upper.rs         |  5 +-
 datafusion/functions/src/core/version.rs      |  2 +-
 24 files changed, 284 insertions(+), 59 deletions(-)

diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 83563603f2f3..b5e9a555c2da 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -193,6 +193,7 @@ impl ScalarUDF {
     /// Invoke the function on `args`, returning the appropriate result.
     ///
     /// See [`ScalarUDFImpl::invoke`] for more details.
+    #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
     pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         self.inner.invoke(args)
     }
@@ -215,13 +216,14 @@ impl ScalarUDF {
     /// Invoke the function without `args` but number of rows, returning the appropriate result.
     ///
     /// See [`ScalarUDFImpl::invoke_no_args`] for more details.
+    #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
     pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
         self.inner.invoke_no_args(number_rows)
     }
 
     /// Returns a `ScalarFunctionImplementation` that can invoke the function
     /// during execution
-    #[deprecated(since = "42.0.0", note = "Use `invoke` or `invoke_no_args` instead")]
+    #[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")]
     pub fn fun(&self) -> ScalarFunctionImplementation {
         let captured = Arc::clone(&self.inner);
         Arc::new(move |args| captured.invoke(args))
diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs
index ca23d8b7ff4c..3c4a09c65992 100644
--- a/datafusion/functions-nested/benches/map.rs
+++ b/datafusion/functions-nested/benches/map.rs
@@ -96,6 +96,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         b.iter(|| {
             black_box(
+                #[allow(deprecated)] // TODO use invoke_batch
                 map_udf()
                     .invoke(&[keys.clone(), values.clone()])
                     .expect("map should work on valid values"),
diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs
index 17c4dd1f8912..9ba16807de01 100644
--- a/datafusion/functions/benches/character_length.rs
+++ b/datafusion/functions/benches/character_length.rs
@@ -84,28 +84,48 @@ fn criterion_benchmark(c: &mut Criterion) {
         let args_string_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, false);
         c.bench_function(
             &format!("character_length_StringArray_ascii_str_len_{}", str_len),
-            |b| b.iter(|| black_box(character_length.invoke(&args_string_ascii))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(character_length.invoke(&args_string_ascii))
+                })
+            },
         );
 
         // StringArray UTF8
         let args_string_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, false);
         c.bench_function(
             &format!("character_length_StringArray_utf8_str_len_{}", str_len),
-            |b| b.iter(|| black_box(character_length.invoke(&args_string_utf8))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(character_length.invoke(&args_string_utf8))
+                })
+            },
         );
 
         // StringViewArray ASCII only
         let args_string_view_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, true);
         c.bench_function(
             &format!("character_length_StringViewArray_ascii_str_len_{}", str_len),
-            |b| b.iter(|| black_box(character_length.invoke(&args_string_view_ascii))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(character_length.invoke(&args_string_view_ascii))
+                })
+            },
         );
 
         // StringViewArray UTF8
         let args_string_view_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, true);
         c.bench_function(
             &format!("character_length_StringViewArray_utf8_str_len_{}", str_len),
-            |b| b.iter(|| black_box(character_length.invoke(&args_string_view_utf8))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(character_length.invoke(&args_string_view_utf8))
+                })
+            },
         );
     }
 }
diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs
index 91c46ac775a8..280819778f93 100644
--- a/datafusion/functions/benches/concat.rs
+++ b/datafusion/functions/benches/concat.rs
@@ -38,7 +38,10 @@ fn criterion_benchmark(c: &mut Criterion) {
         let args = create_args(size, 32);
         let mut group = c.benchmark_group("concat function");
         group.bench_function(BenchmarkId::new("concat", size), |b| {
-            b.iter(|| criterion::black_box(concat().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(concat().invoke(&args).unwrap())
+            })
         });
         group.finish();
     }
diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs
index e655d82dec91..a33f00b4b73e 100644
--- a/datafusion/functions/benches/cot.rs
+++ b/datafusion/functions/benches/cot.rs
@@ -33,12 +33,18 @@ fn criterion_benchmark(c: &mut Criterion) {
         let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
         let f32_args = vec![ColumnarValue::Array(f32_array)];
         c.bench_function(&format!("cot f32 array: {}", size), |b| {
-            b.iter(|| black_box(cot_fn.invoke(&f32_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(cot_fn.invoke(&f32_args).unwrap())
+            })
         });
         let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
         let f64_args = vec![ColumnarValue::Array(f64_array)];
         c.bench_function(&format!("cot f64 array: {}", size), |b| {
-            b.iter(|| black_box(cot_fn.invoke(&f64_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(cot_fn.invoke(&f64_args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs
index c881947354fd..4a8682c42f94 100644
--- a/datafusion/functions/benches/date_bin.rs
+++ b/datafusion/functions/benches/date_bin.rs
@@ -45,6 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let udf = date_bin();
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 udf.invoke(&[interval.clone(), timestamps.clone()])
                     .expect("date_bin should work on valid values"),
diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs
index d49235aac938..0615091e90d4 100644
--- a/datafusion/functions/benches/encoding.rs
+++ b/datafusion/functions/benches/encoding.rs
@@ -29,22 +29,30 @@ fn criterion_benchmark(c: &mut Criterion) {
         let str_array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
         c.bench_function(&format!("base64_decode/{size}"), |b| {
             let method = ColumnarValue::Scalar("base64".into());
+            #[allow(deprecated)] // TODO use invoke_batch
             let encoded = encoding::encode()
                 .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()])
                 .unwrap();
 
             let args = vec![encoded, method];
-            b.iter(|| black_box(decode.invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(decode.invoke(&args).unwrap())
+            })
         });
 
         c.bench_function(&format!("hex_decode/{size}"), |b| {
             let method = ColumnarValue::Scalar("hex".into());
+            #[allow(deprecated)] // TODO use invoke_batch
             let encoded = encoding::encode()
                 .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()])
                 .unwrap();
 
             let args = vec![encoded, method];
-            b.iter(|| black_box(decode.invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(decode.invoke(&args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs
index 16bbe073daf0..3e50de658b36 100644
--- a/datafusion/functions/benches/isnan.rs
+++ b/datafusion/functions/benches/isnan.rs
@@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) {
         let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
         let f32_args = vec![ColumnarValue::Array(f32_array)];
         c.bench_function(&format!("isnan f32 array: {}", size), |b| {
-            b.iter(|| black_box(isnan.invoke(&f32_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(isnan.invoke(&f32_args).unwrap())
+            })
         });
         let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
         let f64_args = vec![ColumnarValue::Array(f64_array)];
         c.bench_function(&format!("isnan f64 array: {}", size), |b| {
-            b.iter(|| black_box(isnan.invoke(&f64_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(isnan.invoke(&f64_args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs
index 3348d172e1f2..3e6ac97063ca 100644
--- a/datafusion/functions/benches/iszero.rs
+++ b/datafusion/functions/benches/iszero.rs
@@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) {
         let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
         let f32_args = vec![ColumnarValue::Array(f32_array)];
         c.bench_function(&format!("iszero f32 array: {}", size), |b| {
-            b.iter(|| black_box(iszero.invoke(&f32_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(iszero.invoke(&f32_args).unwrap())
+            })
         });
         let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
         let f64_args = vec![ColumnarValue::Array(f64_array)];
         c.bench_function(&format!("iszero f64 array: {}", size), |b| {
-            b.iter(|| black_box(iszero.invoke(&f64_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(iszero.invoke(&f64_args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs
index 934c1c6bd189..6cc67791464f 100644
--- a/datafusion/functions/benches/lower.rs
+++ b/datafusion/functions/benches/lower.rs
@@ -124,19 +124,32 @@ fn criterion_benchmark(c: &mut Criterion) {
     for size in [1024, 4096, 8192] {
         let args = create_args1(size, 32);
         c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| {
-            b.iter(|| black_box(lower.invoke(&args)))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(lower.invoke(&args))
+            })
         });
 
         let args = create_args2(size);
         c.bench_function(
             &format!("lower_the_first_value_is_nonascii: {}", size),
-            |b| b.iter(|| black_box(lower.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(lower.invoke(&args))
+                })
+            },
         );
 
         let args = create_args3(size);
         c.bench_function(
             &format!("lower_the_middle_value_is_nonascii: {}", size),
-            |b| b.iter(|| black_box(lower.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(lower.invoke(&args))
+                })
+            },
         );
     }
 
@@ -151,24 +164,33 @@ fn criterion_benchmark(c: &mut Criterion) {
                 for &size in &sizes {
                     let args = create_args4(size, str_len, *null_density, mixed);
                     c.bench_function(
-            &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
+                        &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
                      size, str_len, null_density, mixed),
-            |b| b.iter(|| black_box(lower.invoke(&args))),
-        );
+                        |b| b.iter(|| {
+                            #[allow(deprecated)] // TODO use invoke_batch
+                            black_box(lower.invoke(&args))
+                        }),
+                    );
 
                     let args = create_args4(size, str_len, *null_density, mixed);
                     c.bench_function(
-            &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
+                        &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
                      size, str_len, null_density, mixed),
-            |b| b.iter(|| black_box(lower.invoke(&args))),
-        );
+                        |b| b.iter(|| {
+                            #[allow(deprecated)] // TODO use invoke_batch
+                            black_box(lower.invoke(&args))
+                        }),
+                    );
 
                     let args = create_args5(size, 0.1, *null_density);
                     c.bench_function(
-            &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
+                        &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
                      size, str_len, 0.1, null_density, mixed),
-            |b| b.iter(|| black_box(lower.invoke(&args))),
-        );
+                        |b| b.iter(|| {
+                            #[allow(deprecated)] // TODO use invoke_batch
+                            black_box(lower.invoke(&args))
+                        }),
+                    );
                 }
             }
         }
diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs
index b3fa5ef4fdff..4f94729b6fef 100644
--- a/datafusion/functions/benches/ltrim.rs
+++ b/datafusion/functions/benches/ltrim.rs
@@ -139,7 +139,12 @@ fn run_with_string_type<M: Measurement>(
         format!(
             "{string_type} [size={size}, len_before={len}, len_after={remaining_len}]",
         ),
-        |b| b.iter(|| black_box(ltrim.invoke(&args))),
+        |b| {
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(ltrim.invoke(&args))
+            })
+        },
     );
 }
 
diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs
index cb8f1abe6d5d..a9844e4b2541 100644
--- a/datafusion/functions/benches/make_date.rs
+++ b/datafusion/functions/benches/make_date.rs
@@ -62,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 make_date()
                     .invoke(&[years.clone(), months.clone(), days.clone()])
@@ -77,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 make_date()
                     .invoke(&[year.clone(), months.clone(), days.clone()])
@@ -92,6 +94,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 make_date()
                     .invoke(&[year.clone(), month.clone(), days.clone()])
@@ -106,6 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26)));
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 make_date()
                     .invoke(&[year.clone(), month.clone(), day.clone()])
diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs
index dfabad335835..6e1154cf182a 100644
--- a/datafusion/functions/benches/nullif.rs
+++ b/datafusion/functions/benches/nullif.rs
@@ -33,7 +33,10 @@ fn criterion_benchmark(c: &mut Criterion) {
             ColumnarValue::Array(array),
         ];
         c.bench_function(&format!("nullif scalar array: {}", size), |b| {
-            b.iter(|| black_box(nullif.invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(nullif.invoke(&args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
index 71fa68762c1e..4b21ca373047 100644
--- a/datafusion/functions/benches/pad.rs
+++ b/datafusion/functions/benches/pad.rs
@@ -101,17 +101,26 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         let args = create_args::<i32>(size, 32, false);
         group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
-            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(lpad().invoke(&args).unwrap())
+            })
         });
 
         let args = create_args::<i64>(size, 32, false);
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
-            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(lpad().invoke(&args).unwrap())
+            })
         });
 
         let args = create_args::<i32>(size, 32, true);
         group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-            b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(lpad().invoke(&args).unwrap())
+            })
         });
 
         group.finish();
@@ -120,18 +129,27 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         let args = create_args::<i32>(size, 32, false);
         group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
-            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(rpad().invoke(&args).unwrap())
+            })
         });
 
         let args = create_args::<i64>(size, 32, false);
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
-            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(rpad().invoke(&args).unwrap())
+            })
         });
 
         // rpad for stringview type
         let args = create_args::<i32>(size, 32, true);
         group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                criterion::black_box(rpad().invoke(&args).unwrap())
+            })
         });
 
         group.finish();
diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs
index 5643ccf07133..6e54c92b9b26 100644
--- a/datafusion/functions/benches/repeat.rs
+++ b/datafusion/functions/benches/repeat.rs
@@ -71,7 +71,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_string_view [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         let args = create_args::<i32>(size, 32, repeat_times, false);
@@ -80,7 +85,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_string [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         let args = create_args::<i64>(size, 32, repeat_times, false);
@@ -89,7 +99,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_large_string [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         group.finish();
@@ -107,7 +122,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_string_view [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         let args = create_args::<i32>(size, 32, repeat_times, false);
@@ -116,7 +136,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_string [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         let args = create_args::<i64>(size, 32, repeat_times, false);
@@ -125,7 +150,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "repeat_large_string [size={}, repeat_times={}]",
                 size, repeat_times
             ),
-            |b| b.iter(|| black_box(repeat.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(repeat.invoke(&args))
+                })
+            },
         );
 
         group.finish();
diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs
index 9f8d8258c823..ea1f5433df4e 100644
--- a/datafusion/functions/benches/signum.rs
+++ b/datafusion/functions/benches/signum.rs
@@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) {
         let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
         let f32_args = vec![ColumnarValue::Array(f32_array)];
         c.bench_function(&format!("signum f32 array: {}", size), |b| {
-            b.iter(|| black_box(signum.invoke(&f32_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(signum.invoke(&f32_args).unwrap())
+            })
         });
         let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
         let f64_args = vec![ColumnarValue::Array(f64_array)];
         c.bench_function(&format!("signum f64 array: {}", size), |b| {
-            b.iter(|| black_box(signum.invoke(&f64_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(signum.invoke(&f64_args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs
index c78e69826836..31ca61e34c3a 100644
--- a/datafusion/functions/benches/strpos.rs
+++ b/datafusion/functions/benches/strpos.rs
@@ -112,28 +112,48 @@ fn criterion_benchmark(c: &mut Criterion) {
         let args_string_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, false);
         c.bench_function(
             &format!("strpos_StringArray_ascii_str_len_{}", str_len),
-            |b| b.iter(|| black_box(strpos.invoke(&args_string_ascii))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(strpos.invoke(&args_string_ascii))
+                })
+            },
         );
 
         // StringArray UTF8
         let args_string_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, false);
         c.bench_function(
             &format!("strpos_StringArray_utf8_str_len_{}", str_len),
-            |b| b.iter(|| black_box(strpos.invoke(&args_string_utf8))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(strpos.invoke(&args_string_utf8))
+                })
+            },
         );
 
         // StringViewArray ASCII only
         let args_string_view_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, true);
         c.bench_function(
             &format!("strpos_StringViewArray_ascii_str_len_{}", str_len),
-            |b| b.iter(|| black_box(strpos.invoke(&args_string_view_ascii))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(strpos.invoke(&args_string_view_ascii))
+                })
+            },
         );
 
         // StringViewArray UTF8
         let args_string_view_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, true);
         c.bench_function(
             &format!("strpos_StringViewArray_utf8_str_len_{}", str_len),
-            |b| b.iter(|| black_box(strpos.invoke(&args_string_view_utf8))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(strpos.invoke(&args_string_view_utf8))
+                })
+            },
         );
     }
 }
diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs
index 90ba75c1e8a5..21020dad31a4 100644
--- a/datafusion/functions/benches/substr.rs
+++ b/datafusion/functions/benches/substr.rs
@@ -107,19 +107,34 @@ fn criterion_benchmark(c: &mut Criterion) {
         let args = create_args_without_count::<i32>(size, len, true, true);
         group.bench_function(
             format!("substr_string_view [size={}, strlen={}]", size, len),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_without_count::<i32>(size, len, false, false);
         group.bench_function(
             format!("substr_string [size={}, strlen={}]", size, len),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_without_count::<i64>(size, len, true, false);
         group.bench_function(
             format!("substr_large_string [size={}, strlen={}]", size, len),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         group.finish();
@@ -137,7 +152,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_string_view [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_with_count::<i32>(size, len, count, false);
@@ -146,7 +166,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_string [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_with_count::<i64>(size, len, count, false);
@@ -155,7 +180,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_large_string [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         group.finish();
@@ -173,7 +203,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_string_view [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_with_count::<i32>(size, len, count, false);
@@ -182,7 +217,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_string [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         let args = create_args_with_count::<i64>(size, len, count, false);
@@ -191,7 +231,12 @@ fn criterion_benchmark(c: &mut Criterion) {
                 "substr_large_string [size={}, count={}, strlen={}]",
                 size, count, len,
             ),
-            |b| b.iter(|| black_box(substr.invoke(&args))),
+            |b| {
+                b.iter(|| {
+                    #[allow(deprecated)] // TODO use invoke_batch
+                    black_box(substr.invoke(&args))
+                })
+            },
         );
 
         group.finish();
diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs
index bb9a5b809eee..1e793cf4db8c 100644
--- a/datafusion/functions/benches/substr_index.rs
+++ b/datafusion/functions/benches/substr_index.rs
@@ -90,6 +90,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         let args = [strings, delimiters, counts];
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 substr_index()
                     .invoke(&args)
diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs
index d9a153e64abc..09032fdf2de1 100644
--- a/datafusion/functions/benches/to_char.rs
+++ b/datafusion/functions/benches/to_char.rs
@@ -86,6 +86,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_char()
                     .invoke(&[data.clone(), patterns.clone()])
@@ -101,6 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string())));
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_char()
                     .invoke(&[data.clone(), patterns.clone()])
@@ -124,6 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         )));
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_char()
                     .invoke(&[data.clone(), pattern.clone()])
diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs
index 5a87b34caf47..11816fe9c64f 100644
--- a/datafusion/functions/benches/to_timestamp.rs
+++ b/datafusion/functions/benches/to_timestamp.rs
@@ -113,6 +113,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let string_array = ColumnarValue::Array(Arc::new(data()) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&[string_array.clone()])
@@ -126,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&[string_array.clone()])
@@ -139,6 +141,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
 
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&[string_array.clone()])
@@ -157,6 +160,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             ColumnarValue::Array(Arc::new(format3) as ArrayRef),
         ];
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&args.clone())
@@ -183,6 +187,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             ),
         ];
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&args.clone())
@@ -209,6 +214,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             ),
         ];
         b.iter(|| {
+            #[allow(deprecated)] // TODO use invoke_batch
             black_box(
                 to_timestamp()
                     .invoke(&args.clone())
diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs
index 92a08abf3d32..07ce522eb913 100644
--- a/datafusion/functions/benches/trunc.rs
+++ b/datafusion/functions/benches/trunc.rs
@@ -33,12 +33,18 @@ fn criterion_benchmark(c: &mut Criterion) {
         let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
         let f32_args = vec![ColumnarValue::Array(f32_array)];
         c.bench_function(&format!("trunc f32 array: {}", size), |b| {
-            b.iter(|| black_box(trunc.invoke(&f32_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(trunc.invoke(&f32_args).unwrap())
+            })
         });
         let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
         let f64_args = vec![ColumnarValue::Array(f64_array)];
         c.bench_function(&format!("trunc f64 array: {}", size), |b| {
-            b.iter(|| black_box(trunc.invoke(&f64_args).unwrap()))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(trunc.invoke(&f64_args).unwrap())
+            })
         });
     }
 }
diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs
index a3e5fbd7a433..ac4ecacff941 100644
--- a/datafusion/functions/benches/upper.rs
+++ b/datafusion/functions/benches/upper.rs
@@ -37,7 +37,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     for size in [1024, 4096, 8192] {
         let args = create_args(size, 32);
         c.bench_function("upper_all_values_are_ascii", |b| {
-            b.iter(|| black_box(upper.invoke(&args)))
+            b.iter(|| {
+                #[allow(deprecated)] // TODO use invoke_batch
+                black_box(upper.invoke(&args))
+            })
         });
     }
 }
diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs
index f726122c649a..e7ac749ddddc 100644
--- a/datafusion/functions/src/core/version.rs
+++ b/datafusion/functions/src/core/version.rs
@@ -118,7 +118,7 @@ mod test {
     #[tokio::test]
     async fn test_version_udf() {
         let version_udf = ScalarUDF::from(VersionFunc::new());
-        let version = version_udf.invoke_no_args(0).unwrap();
+        let version = version_udf.invoke_batch(&[], 1).unwrap();
 
         if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(version))) = version {
             assert!(version.starts_with("Apache DataFusion"));

From 6b76a35cadb17b33a5140f6f67e0491aabaa409e Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Fri, 1 Nov 2024 11:12:22 -0700
Subject: [PATCH 27/32] consider volatile function in simply_expression
 (#13128)

* consider volatile function in simply_expression

* refactor and fix bugs

* fix clippy

* refactor

* refactor

* format

* fix clippy

* Resolve logical conflict

* simplify more

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../simplify_expressions/expr_simplifier.rs   | 73 ++++++++++++++++++-
 .../src/simplify_expressions/utils.rs         | 13 +++-
 2 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index ce6734616b80..40be1f85391d 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -862,8 +862,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
                 right,
             }) if has_common_conjunction(&left, &right) => {
                 let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
-                let (common, rhs): (Vec<_>, Vec<_>) =
-                    iter_conjunction_owned(*right).partition(|e| lhs.contains(e));
+                let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
+                    .partition(|e| lhs.contains(e) && !e.is_volatile());
 
                 let new_rhs = rhs.into_iter().reduce(and);
                 let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
@@ -1682,8 +1682,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
 }
 
 fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
-    let lhs: HashSet<&Expr> = iter_conjunction(lhs).collect();
-    iter_conjunction(rhs).any(|e| lhs.contains(&e))
+    let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
+    iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
 }
 
 // TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
@@ -3978,4 +3978,69 @@ mod tests {
             unimplemented!("not needed for tests")
         }
     }
+    #[derive(Debug)]
+    struct VolatileUdf {
+        signature: Signature,
+    }
+
+    impl VolatileUdf {
+        pub fn new() -> Self {
+            Self {
+                signature: Signature::exact(vec![], Volatility::Volatile),
+            }
+        }
+    }
+    impl ScalarUDFImpl for VolatileUdf {
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
+
+        fn name(&self) -> &str {
+            "VolatileUdf"
+        }
+
+        fn signature(&self) -> &Signature {
+            &self.signature
+        }
+
+        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+            Ok(DataType::Int16)
+        }
+    }
+    #[test]
+    fn test_optimize_volatile_conditions() {
+        let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
+        let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
+        {
+            let expr = rand
+                .clone()
+                .eq(lit(0))
+                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
+
+            assert_eq!(simplify(expr.clone()), expr);
+        }
+
+        {
+            let expr = col("column1")
+                .eq(lit(2))
+                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
+
+            assert_eq!(simplify(expr), col("column1").eq(lit(2)));
+        }
+
+        {
+            let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
+                "column1",
+            )
+            .eq(lit(2))
+            .and(rand.clone().eq(lit(0))));
+
+            assert_eq!(
+                simplify(expr),
+                col("column1")
+                    .eq(lit(2))
+                    .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
+            );
+        }
+    }
 }
diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs
index 38bfc1a93403..c30c3631c193 100644
--- a/datafusion/optimizer/src/simplify_expressions/utils.rs
+++ b/datafusion/optimizer/src/simplify_expressions/utils.rs
@@ -67,16 +67,21 @@ pub static POWS_OF_TEN: [i128; 38] = [
 
 /// returns true if `needle` is found in a chain of search_op
 /// expressions. Such as: (A AND B) AND C
-pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool {
+fn expr_contains_inner(expr: &Expr, needle: &Expr, search_op: Operator) -> bool {
     match expr {
         Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == search_op => {
-            expr_contains(left, needle, search_op)
-                || expr_contains(right, needle, search_op)
+            expr_contains_inner(left, needle, search_op)
+                || expr_contains_inner(right, needle, search_op)
         }
         _ => expr == needle,
     }
 }
 
+/// check volatile calls and return if expr contains needle
+pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool {
+    expr_contains_inner(expr, needle, search_op) && !needle.is_volatile()
+}
+
 /// Deletes all 'needles' or remains one 'needle' that are found in a chain of xor
 /// expressions. Such as: A ^ (A ^ (B ^ A))
 pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr, is_left: bool) -> Expr {
@@ -206,7 +211,7 @@ pub fn is_false(expr: &Expr) -> bool {
 
 /// returns true if `haystack` looks like (needle OP X) or (X OP needle)
 pub fn is_op_with(target_op: Operator, haystack: &Expr, needle: &Expr) -> bool {
-    matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref()))
+    matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref()) && !needle.is_volatile())
 }
 
 /// returns true if `not_expr` is !`expr` (not)

From 9ff08005382d5d71fa5e9b176ed01c087e649c4b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 1 Nov 2024 15:05:33 -0400
Subject: [PATCH 28/32] Minor: fix merge conflict (#13219)

---
 datafusion/substrait/src/logical_plan/consumer.rs | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index a12406bd3439..7ccca8616ba0 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -1126,13 +1126,8 @@ fn retrieve_emit_kind(rel_common: Option<&RelCommon>) -> EmitKind {
         .map_or(default, |ek| ek.clone())
 }
 
-fn contains_volatile_expr(proj: &Projection) -> Result<bool> {
-    for expr in proj.expr.iter() {
-        if expr.is_volatile()? {
-            return Ok(true);
-        }
-    }
-    Ok(false)
+fn contains_volatile_expr(proj: &Projection) -> bool {
+    proj.expr.iter().any(|e| e.is_volatile())
 }
 
 fn apply_emit_kind(
@@ -1151,7 +1146,7 @@ fn apply_emit_kind(
                 // expressions in the projection are volatile. This is to avoid issues like
                 // converting a single call of the random() function into multiple calls due to
                 // duplicate fields in the output_mapping.
-                LogicalPlan::Projection(proj) if !contains_volatile_expr(&proj)? => {
+                LogicalPlan::Projection(proj) if !contains_volatile_expr(&proj) => {
                     let mut exprs: Vec<Expr> = vec![];
                     for field in output_mapping {
                         let expr = proj.expr

From 752561a9fc821fce6a657fd1b959996900c44c5f Mon Sep 17 00:00:00 2001
From: ngli-me <107162634+ngli-me@users.noreply.github.com>
Date: Fri, 1 Nov 2024 14:39:04 -0500
Subject: [PATCH 29/32] Convert LexOrdering `type` to `struct`. (#13146)

* Conversion types for LexOrdering and LexOrderingRef to structs.

* Format and fix type errors. Adjusted expected output when using `LexOrdering`.

* Updated usage of `FromIterator` and removed `empty()` in favor of `default()`.

* Adjusted chained `map` and `flatten` calls to `flat_map`, and swapped `unwrap_or` to `unwrap_or_default`.

* Adjusted slt files to include a space after commas, when relating to LexOrdering and LexOrderingRef.

* Removed unnecessary path prefixes in `sort_expr`.

* Fixed tpch slt files.

* Removed LexOrderingRef struct.

* Removed dereferences to `LexOrderingRef` left over from the struct removal.

* Removed remaining usage of the raw `LexOrderingRef` type.

* Formatting.

* Apply suggestions from code review, along with formatting.

* Merged with main.

* Merged with main.

---------

Co-authored-by: nglime <git@ngli.me>
---
 benchmarks/src/sort.rs                        |   6 +-
 datafusion/core/benches/physical_plan.rs      |   3 +-
 datafusion/core/benches/sort.rs               |   3 +-
 .../core/src/datasource/listing/table.rs      |  37 +--
 datafusion/core/src/datasource/mod.rs         |   2 +-
 .../physical_plan/file_scan_config.rs         |   5 +-
 .../core/src/datasource/physical_plan/mod.rs  |   7 +-
 .../datasource/physical_plan/statistics.rs    |  19 +-
 .../enforce_distribution.rs                   | 181 +++++++-------
 .../src/physical_optimizer/enforce_sorting.rs | 223 +++++++++---------
 .../src/physical_optimizer/join_selection.rs  |   8 +-
 .../physical_optimizer/projection_pushdown.rs |  50 ++--
 .../replace_with_order_preserving_variants.rs |   6 +-
 .../src/physical_optimizer/sort_pushdown.rs   |  25 +-
 .../core/src/physical_optimizer/test_utils.rs |  17 +-
 .../physical_optimizer/update_aggr_exprs.rs   |   6 +-
 .../core/src/physical_optimizer/utils.rs      |   2 +-
 datafusion/core/src/physical_planner.rs       |  12 +-
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   |  10 +-
 .../aggregation_fuzzer/data_generator.rs      |   5 +-
 .../tests/fuzz_cases/equivalence/ordering.rs  |  14 +-
 .../fuzz_cases/equivalence/projection.rs      |   6 +-
 .../fuzz_cases/equivalence/properties.rs      |   4 +-
 .../tests/fuzz_cases/equivalence/utils.rs     |  12 +-
 .../core/tests/fuzz_cases/merge_fuzz.rs       |   5 +-
 datafusion/core/tests/fuzz_cases/sort_fuzz.rs |   5 +-
 .../sort_preserving_repartition_fuzz.rs       |   3 +-
 .../core/tests/fuzz_cases/window_fuzz.rs      |  21 +-
 datafusion/core/tests/memory_limit/mod.rs     |  40 ++--
 .../limited_distinct_aggregation.rs           |   5 +-
 .../tests/physical_optimizer/test_util.rs     |   4 +-
 .../src/accumulator.rs                        |   7 +-
 .../functions-aggregate-common/src/utils.rs   |   6 +-
 .../functions-aggregate/benches/count.rs      |   3 +-
 datafusion/functions-aggregate/benches/sum.rs |   3 +-
 .../functions-aggregate/src/array_agg.rs      |   4 +-
 .../functions-aggregate/src/first_last.rs     |  92 +++++---
 .../functions-aggregate/src/nth_value.rs      |   4 +-
 datafusion/functions-aggregate/src/stddev.rs  |   5 +-
 .../physical-expr-common/src/sort_expr.rs     | 183 +++++++++++---
 datafusion/physical-expr-common/src/utils.rs  |   4 +-
 datafusion/physical-expr/src/aggregate.rs     |  27 ++-
 .../physical-expr/src/equivalence/mod.rs      |  25 +-
 .../physical-expr/src/equivalence/ordering.rs |  40 ++--
 .../src/equivalence/properties.rs             | 130 +++++-----
 datafusion/physical-expr/src/utils/mod.rs     |   6 +-
 .../physical-expr/src/window/aggregate.rs     |  24 +-
 .../physical-expr/src/window/built_in.rs      |  17 +-
 .../src/window/sliding_aggregate.rs           |  22 +-
 .../physical-expr/src/window/window_expr.rs   |   4 +-
 .../src/topk_aggregation.rs                   |   3 +-
 datafusion/physical-plan/benches/spm.rs       |   5 +-
 .../physical-plan/src/aggregates/mod.rs       |  26 +-
 .../physical-plan/src/aggregates/order/mod.rs |   4 +-
 .../src/aggregates/order/partial.rs           |   4 +-
 .../physical-plan/src/aggregates/row_hash.rs  |  16 +-
 datafusion/physical-plan/src/display.rs       |  23 +-
 .../physical-plan/src/execution_plan.rs       |  12 +-
 .../src/joins/nested_loop_join.rs             |   4 +-
 .../src/joins/sort_merge_join.rs              |  14 +-
 .../src/joins/stream_join_utils.rs            |   5 +-
 .../src/joins/symmetric_hash_join.rs          | 112 ++++-----
 .../physical-plan/src/joins/test_utils.rs     |   6 +-
 datafusion/physical-plan/src/joins/utils.rs   |  40 ++--
 datafusion/physical-plan/src/memory.rs        |  23 +-
 .../physical-plan/src/repartition/mod.rs      |  21 +-
 .../physical-plan/src/sorts/partial_sort.rs   |  52 ++--
 datafusion/physical-plan/src/sorts/sort.rs    |  71 +++---
 .../src/sorts/sort_preserving_merge.rs        |  92 ++++----
 datafusion/physical-plan/src/sorts/stream.rs  |   3 +-
 .../src/sorts/streaming_merge.rs              |   7 +-
 datafusion/physical-plan/src/topk/mod.rs      |   8 +-
 datafusion/physical-plan/src/union.rs         |   5 +-
 .../src/windows/bounded_window_agg_exec.rs    |  33 +--
 datafusion/physical-plan/src/windows/mod.rs   |  27 ++-
 .../src/windows/window_agg_exec.rs            |  13 +-
 .../proto/src/physical_plan/from_proto.rs     |   8 +-
 datafusion/proto/src/physical_plan/mod.rs     |  19 +-
 .../proto/src/physical_plan/to_proto.rs       |   7 +-
 .../tests/cases/roundtrip_physical_plan.rs    |  31 +--
 .../sqllogictest/test_files/aggregate.slt     |   2 +-
 datafusion/sqllogictest/test_files/cte.slt    |   2 +-
 .../test_files/filter_without_sort_exec.slt   |   6 +-
 .../sqllogictest/test_files/group_by.slt      |  16 +-
 datafusion/sqllogictest/test_files/insert.slt |   6 +-
 .../test_files/insert_to_external.slt         |   6 +-
 .../join_disable_repartition_joins.slt        |   2 +-
 datafusion/sqllogictest/test_files/joins.slt  |  12 +-
 .../test_files/monotonic_projection_test.slt  |  10 +-
 datafusion/sqllogictest/test_files/order.slt  |  16 +-
 .../sqllogictest/test_files/parquet.slt       |  10 +-
 datafusion/sqllogictest/test_files/select.slt |   8 +-
 .../sqllogictest/test_files/subquery_sort.slt |  10 +-
 .../sqllogictest/test_files/tpch/q1.slt.part  |   4 +-
 .../sqllogictest/test_files/tpch/q13.slt.part |   4 +-
 .../sqllogictest/test_files/tpch/q16.slt.part |   4 +-
 .../sqllogictest/test_files/tpch/q18.slt.part |   4 +-
 .../sqllogictest/test_files/tpch/q2.slt.part  |   4 +-
 .../sqllogictest/test_files/tpch/q21.slt.part |   4 +-
 .../sqllogictest/test_files/tpch/q3.slt.part  |   4 +-
 .../sqllogictest/test_files/tpch/q7.slt.part  |   4 +-
 .../sqllogictest/test_files/tpch/q9.slt.part  |   4 +-
 datafusion/sqllogictest/test_files/window.slt |  72 +++---
 103 files changed, 1238 insertions(+), 1022 deletions(-)

diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs
index 247727e1b484..b2038c432f77 100644
--- a/benchmarks/src/sort.rs
+++ b/benchmarks/src/sort.rs
@@ -22,7 +22,7 @@ use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};
 
 use arrow::util::pretty;
 use datafusion::common::Result;
-use datafusion::physical_expr::PhysicalSortExpr;
+use datafusion::physical_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr};
 use datafusion::physical_plan::collect;
 use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::prelude::{SessionConfig, SessionContext};
@@ -170,13 +170,13 @@ impl RunOpt {
 
 async fn exec_sort(
     ctx: &SessionContext,
-    expr: &[PhysicalSortExpr],
+    expr: LexOrderingRef<'_>,
     test_file: &TestParquetFile,
     debug: bool,
 ) -> Result<(usize, std::time::Duration)> {
     let start = Instant::now();
     let scan = test_file.create_scan(ctx, None).await?;
-    let exec = Arc::new(SortExec::new(expr.to_owned(), scan));
+    let exec = Arc::new(SortExec::new(LexOrdering::new(expr.to_owned()), scan));
     let task_ctx = ctx.task_ctx();
     let result = collect(exec, task_ctx).await?;
     let elapsed = start.elapsed();
diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
index 3ad71be1f447..349c2e438195 100644
--- a/datafusion/core/benches/physical_plan.rs
+++ b/datafusion/core/benches/physical_plan.rs
@@ -36,6 +36,7 @@ use datafusion::physical_plan::{
     memory::MemoryExec,
 };
 use datafusion::prelude::SessionContext;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 // Initialise the operator using the provided record batches and the sort key
 // as inputs. All record batches must have the same schema.
@@ -52,7 +53,7 @@ fn sort_preserving_merge_operator(
             expr: col(name, &schema).unwrap(),
             options: Default::default(),
         })
-        .collect::<Vec<_>>();
+        .collect::<LexOrdering>();
 
     let exec = MemoryExec::try_new(
         &batches.into_iter().map(|rb| vec![rb]).collect::<Vec<_>>(),
diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index 99a74b61b3e0..14e80ce364e3 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -89,6 +89,7 @@ use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
 
 /// Benchmarks for SortPreservingMerge stream
 use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use futures::StreamExt;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
@@ -257,7 +258,7 @@ impl BenchCase {
 }
 
 /// Make sort exprs for each column in `schema`
-fn make_sort_exprs(schema: &Schema) -> Vec<PhysicalSortExpr> {
+fn make_sort_exprs(schema: &Schema) -> LexOrdering {
     schema
         .fields()
         .iter()
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index ea2e098ef14e..15125fe5a090 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1283,13 +1283,16 @@ mod tests {
             // ok with one column
             (
                 vec![vec![col("string_col").sort(true, false)]],
-                Ok(vec![vec![PhysicalSortExpr {
-                    expr: physical_col("string_col", &schema).unwrap(),
-                    options: SortOptions {
-                        descending: false,
-                        nulls_first: false,
-                    },
-                }]])
+                Ok(vec![LexOrdering {
+                        inner: vec![PhysicalSortExpr {
+                            expr: physical_col("string_col", &schema).unwrap(),
+                            options: SortOptions {
+                                descending: false,
+                                nulls_first: false,
+                            },
+                        }],
+                    }
+                ])
             ),
             // ok with two columns, different options
             (
@@ -1297,15 +1300,17 @@ mod tests {
                     col("string_col").sort(true, false),
                     col("int_col").sort(false, true),
                 ]],
-                Ok(vec![vec![
-                    PhysicalSortExpr::new_default(physical_col("string_col", &schema).unwrap())
-                    .asc()
-                    .nulls_last(),
-
-                    PhysicalSortExpr::new_default(physical_col("int_col", &schema).unwrap())
-                    .desc()
-                    .nulls_first()
-                ]])
+                Ok(vec![LexOrdering {
+                        inner: vec![
+                            PhysicalSortExpr::new_default(physical_col("string_col", &schema).unwrap())
+                                        .asc()
+                                        .nulls_last(),
+                            PhysicalSortExpr::new_default(physical_col("int_col", &schema).unwrap())
+                                        .desc()
+                                        .nulls_first()
+                        ],
+                    }
+                ])
             ),
         ];
 
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 0ed53418fe32..ad369b75e130 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -62,7 +62,7 @@ fn create_ordering(
 
     for exprs in sort_order {
         // Construct PhysicalSortExpr objects from Expr objects:
-        let mut sort_exprs = vec![];
+        let mut sort_exprs = LexOrdering::default();
         for sort in exprs {
             match &sort.expr {
                 Expr::Column(col) => match expressions::col(&col.name, schema) {
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index 96c0e452e29e..74ab0126a557 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -34,7 +34,8 @@ use arrow_array::{ArrayRef, DictionaryArray, RecordBatch, RecordBatchOptions};
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::stats::Precision;
 use datafusion_common::{exec_err, ColumnStatistics, DataFusionError, Statistics};
-use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr::LexOrdering;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 
 use log::warn;
 
@@ -307,7 +308,7 @@ impl FileScanConfig {
     pub fn split_groups_by_statistics(
         table_schema: &SchemaRef,
         file_groups: &[Vec<PartitionedFile>],
-        sort_order: &[PhysicalSortExpr],
+        sort_order: LexOrderingRef,
     ) -> Result<Vec<Vec<PartitionedFile>>> {
         let flattened_files = file_groups.iter().flatten().collect::<Vec<_>>();
         // First Fit:
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 407a3b74f79f..9971e87282a5 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -65,6 +65,7 @@ use crate::{
 use arrow::datatypes::{DataType, SchemaRef};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 use futures::StreamExt;
 use log::debug;
@@ -328,11 +329,11 @@ impl From<ObjectMeta> for FileMeta {
 fn get_projected_output_ordering(
     base_config: &FileScanConfig,
     projected_schema: &SchemaRef,
-) -> Vec<Vec<PhysicalSortExpr>> {
+) -> Vec<LexOrdering> {
     let mut all_orderings = vec![];
     for output_ordering in &base_config.output_ordering {
-        let mut new_ordering = vec![];
-        for PhysicalSortExpr { expr, options } in output_ordering {
+        let mut new_ordering = LexOrdering::default();
+        for PhysicalSortExpr { expr, options } in output_ordering.iter() {
             if let Some(col) = expr.as_any().downcast_ref::<Column>() {
                 let name = col.name();
                 if let Some((idx, _)) = projected_schema.column_with_name(name) {
diff --git a/datafusion/core/src/datasource/physical_plan/statistics.rs b/datafusion/core/src/datasource/physical_plan/statistics.rs
index 3ca3ba89f4d9..6af153a731b0 100644
--- a/datafusion/core/src/datasource/physical_plan/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/statistics.rs
@@ -26,6 +26,8 @@
 
 use std::sync::Arc;
 
+use crate::datasource::listing::PartitionedFile;
+
 use arrow::{
     compute::SortColumn,
     row::{Row, Rows},
@@ -34,8 +36,7 @@ use arrow_array::RecordBatch;
 use arrow_schema::SchemaRef;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr};
-
-use crate::datasource::listing::PartitionedFile;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 
 /// A normalized representation of file min/max statistics that allows for efficient sorting & comparison.
 /// The min/max values are ordered by [`Self::sort_order`].
@@ -43,13 +44,13 @@ use crate::datasource::listing::PartitionedFile;
 pub(crate) struct MinMaxStatistics {
     min_by_sort_order: Rows,
     max_by_sort_order: Rows,
-    sort_order: Vec<PhysicalSortExpr>,
+    sort_order: LexOrdering,
 }
 
 impl MinMaxStatistics {
     /// Sort order used to sort the statistics
     #[allow(unused)]
-    pub fn sort_order(&self) -> &[PhysicalSortExpr] {
+    pub fn sort_order(&self) -> LexOrderingRef {
         &self.sort_order
     }
 
@@ -65,8 +66,8 @@ impl MinMaxStatistics {
     }
 
     pub fn new_from_files<'a>(
-        projected_sort_order: &[PhysicalSortExpr], // Sort order with respect to projected schema
-        projected_schema: &SchemaRef,              // Projected schema
+        projected_sort_order: LexOrderingRef, // Sort order with respect to projected schema
+        projected_schema: &SchemaRef,         // Projected schema
         projection: Option<&[usize]>, // Indices of projection in full table schema (None = all columns)
         files: impl IntoIterator<Item = &'a PartitionedFile>,
     ) -> Result<Self> {
@@ -166,7 +167,7 @@ impl MinMaxStatistics {
     }
 
     pub fn new(
-        sort_order: &[PhysicalSortExpr],
+        sort_order: LexOrderingRef,
         schema: &SchemaRef,
         min_values: RecordBatch,
         max_values: RecordBatch,
@@ -256,7 +257,7 @@ impl MinMaxStatistics {
         Ok(Self {
             min_by_sort_order: min.map_err(|e| e.context("build min rows"))?,
             max_by_sort_order: max.map_err(|e| e.context("build max rows"))?,
-            sort_order: sort_order.to_vec(),
+            sort_order: LexOrdering::from_ref(sort_order),
         })
     }
 
@@ -277,7 +278,7 @@ impl MinMaxStatistics {
 }
 
 fn sort_columns_from_physical_sort_exprs(
-    sort_order: &[PhysicalSortExpr],
+    sort_order: LexOrderingRef,
 ) -> Option<Vec<&Column>> {
     sort_order
         .iter()
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index ff8f16f4ee9c..6cd902db7244 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -52,11 +52,12 @@ use datafusion_physical_expr::utils::map_columns_before_projection;
 use datafusion_physical_expr::{
     physical_exprs_equal, EquivalenceProperties, PhysicalExpr, PhysicalExprRef,
 };
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_optimizer::output_requirements::OutputRequirementExec;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
 use datafusion_physical_plan::ExecutionPlanProperties;
 
-use datafusion_physical_optimizer::output_requirements::OutputRequirementExec;
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// The `EnforceDistribution` rule ensures that distribution requirements are
@@ -935,7 +936,7 @@ fn add_spm_on_top(input: DistributionContext) -> DistributionContext {
 
         let new_plan = if should_preserve_ordering {
             Arc::new(SortPreservingMergeExec::new(
-                input.plan.output_ordering().unwrap_or(&[]).to_vec(),
+                LexOrdering::from_ref(input.plan.output_ordering().unwrap_or(&[])),
                 input.plan.clone(),
             )) as _
         } else {
@@ -1435,7 +1436,7 @@ pub(crate) mod tests {
     impl SortRequiredExec {
         fn new_with_requirement(
             input: Arc<dyn ExecutionPlan>,
-            requirement: Vec<PhysicalSortExpr>,
+            requirement: LexOrdering,
         ) -> Self {
             let cache = Self::compute_properties(&input);
             Self {
@@ -1461,11 +1462,7 @@ pub(crate) mod tests {
             _t: DisplayFormatType,
             f: &mut std::fmt::Formatter,
         ) -> std::fmt::Result {
-            write!(
-                f,
-                "SortRequiredExec: [{}]",
-                PhysicalSortExpr::format_list(&self.expr)
-            )
+            write!(f, "SortRequiredExec: [{}]", self.expr)
         }
     }
 
@@ -1495,7 +1492,9 @@ pub(crate) mod tests {
             if self.expr.is_empty() {
                 vec![None]
             } else {
-                vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))]
+                vec![Some(PhysicalSortRequirement::from_sort_exprs(
+                    self.expr.iter(),
+                ))]
             }
         }
 
@@ -1540,7 +1539,7 @@ pub(crate) mod tests {
 
     /// create a single parquet file that is sorted
     pub(crate) fn parquet_exec_with_sort(
-        output_ordering: Vec<Vec<PhysicalSortExpr>>,
+        output_ordering: Vec<LexOrdering>,
     ) -> Arc<ParquetExec> {
         ParquetExec::builder(
             FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
@@ -1556,7 +1555,7 @@ pub(crate) mod tests {
 
     /// Created a sorted parquet exec with multiple files
     fn parquet_exec_multiple_sorted(
-        output_ordering: Vec<Vec<PhysicalSortExpr>>,
+        output_ordering: Vec<LexOrdering>,
     ) -> Arc<ParquetExec> {
         ParquetExec::builder(
             FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
@@ -1573,7 +1572,7 @@ pub(crate) mod tests {
         csv_exec_with_sort(vec![])
     }
 
-    fn csv_exec_with_sort(output_ordering: Vec<Vec<PhysicalSortExpr>>) -> Arc<CsvExec> {
+    fn csv_exec_with_sort(output_ordering: Vec<LexOrdering>) -> Arc<CsvExec> {
         Arc::new(
             CsvExec::builder(
                 FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
@@ -1596,9 +1595,7 @@ pub(crate) mod tests {
     }
 
     // Created a sorted parquet exec with multiple files
-    fn csv_exec_multiple_sorted(
-        output_ordering: Vec<Vec<PhysicalSortExpr>>,
-    ) -> Arc<CsvExec> {
+    fn csv_exec_multiple_sorted(output_ordering: Vec<LexOrdering>) -> Arc<CsvExec> {
         Arc::new(
             CsvExec::builder(
                 FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
@@ -1728,7 +1725,7 @@ pub(crate) mod tests {
     }
 
     fn sort_exec(
-        sort_exprs: Vec<PhysicalSortExpr>,
+        sort_exprs: LexOrdering,
         input: Arc<dyn ExecutionPlan>,
         preserve_partitioning: bool,
     ) -> Arc<dyn ExecutionPlan> {
@@ -1738,7 +1735,7 @@ pub(crate) mod tests {
     }
 
     fn sort_preserving_merge_exec(
-        sort_exprs: Vec<PhysicalSortExpr>,
+        sort_exprs: LexOrdering,
         input: Arc<dyn ExecutionPlan>,
     ) -> Arc<dyn ExecutionPlan> {
         Arc::new(SortPreservingMergeExec::new(sort_exprs, input))
@@ -3076,7 +3073,7 @@ pub(crate) mod tests {
         // Only two RepartitionExecs added
         let expected = &[
             "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
-            "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[true]",
+            "SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]",
             "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
             "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
             "AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
@@ -3084,7 +3081,7 @@ pub(crate) mod tests {
             "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
             "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
-            "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[true]",
+            "SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]",
             "ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
             "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
             "RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
@@ -3096,9 +3093,9 @@ pub(crate) mod tests {
 
         let expected_first_sort_enforcement = &[
             "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
-            "RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC,a3@0 ASC",
+            "RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC, a3@0 ASC",
             "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]",
             "CoalescePartitionsExec",
             "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
             "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
@@ -3107,9 +3104,9 @@ pub(crate) mod tests {
             "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
             "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
-            "RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC,a2@0 ASC",
+            "RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC, a2@0 ASC",
             "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]",
             "CoalescePartitionsExec",
             "ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
             "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
@@ -3127,10 +3124,10 @@ pub(crate) mod tests {
     fn merge_does_not_need_sort() -> Result<()> {
         // see https://github.com/apache/datafusion/issues/4331
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
 
         // Scan some sorted parquet files
         let exec = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
@@ -3329,10 +3326,10 @@ pub(crate) mod tests {
     #[test]
     fn repartition_sorted_limit() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = limit_exec(sort_exec(sort_key, parquet_exec(), false));
 
         let expected = &[
@@ -3351,10 +3348,10 @@ pub(crate) mod tests {
     #[test]
     fn repartition_sorted_limit_with_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_required_exec_with_req(
             filter_exec(sort_exec(sort_key.clone(), parquet_exec(), false)),
             sort_key,
@@ -3430,10 +3427,10 @@ pub(crate) mod tests {
     fn repartition_through_sort_preserving_merge() -> Result<()> {
         // sort preserving merge with non-sorted input
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_preserving_merge_exec(sort_key, parquet_exec());
 
         // need resort as the data was not sorted correctly
@@ -3451,10 +3448,10 @@ pub(crate) mod tests {
     fn repartition_ignores_sort_preserving_merge() -> Result<()> {
         // sort preserving merge already sorted input,
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_preserving_merge_exec(
             sort_key.clone(),
             parquet_exec_multiple_sorted(vec![sort_key]),
@@ -3483,10 +3480,10 @@ pub(crate) mod tests {
     fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> {
         // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved)
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]);
         let plan = sort_preserving_merge_exec(sort_key, input);
 
@@ -3517,10 +3514,10 @@ pub(crate) mod tests {
         //  SortRequired
         //    Parquet(sorted)
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("d", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_required_exec_with_req(
             filter_exec(parquet_exec_with_sort(vec![sort_key.clone()])),
             sort_key,
@@ -3552,10 +3549,10 @@ pub(crate) mod tests {
         //    Parquet(unsorted)
 
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input1 = sort_required_exec_with_req(
             parquet_exec_with_sort(vec![sort_key.clone()]),
             sort_key,
@@ -3594,10 +3591,10 @@ pub(crate) mod tests {
         )];
         // non sorted input
         let proj = Arc::new(ProjectionExec::try_new(proj_exprs, parquet_exec())?);
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("sum", &proj.schema()).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_preserving_merge_exec(sort_key, proj);
 
         let expected = &[
@@ -3627,10 +3624,10 @@ pub(crate) mod tests {
     #[test]
     fn repartition_ignores_transitively_with_projection() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let alias = vec![
             ("a".to_string(), "a".to_string()),
             ("b".to_string(), "b".to_string()),
@@ -3660,10 +3657,10 @@ pub(crate) mod tests {
     #[test]
     fn repartition_transitively_past_sort_with_projection() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let alias = vec![
             ("a".to_string(), "a".to_string()),
             ("b".to_string(), "b".to_string()),
@@ -3693,10 +3690,10 @@ pub(crate) mod tests {
     #[test]
     fn repartition_transitively_past_sort_with_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_exec(sort_key, filter_exec(parquet_exec()), false);
 
         let expected = &[
@@ -3727,10 +3724,10 @@ pub(crate) mod tests {
     #[cfg(feature = "parquet")]
     fn repartition_transitively_past_sort_with_projection_and_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan = sort_exec(
             sort_key,
             projection_exec_with_alias(
@@ -3797,10 +3794,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_multiple_files() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
 
         let plan = filter_exec(parquet_exec_multiple_sorted(vec![sort_key.clone()]));
         let plan = sort_required_exec_with_req(plan, sort_key);
@@ -3961,10 +3958,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_sorted_limit() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan_parquet = limit_exec(sort_exec(sort_key.clone(), parquet_exec(), false));
         let plan_csv = limit_exec(sort_exec(sort_key, csv_exec(), false));
 
@@ -3993,10 +3990,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_limit_with_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan_parquet = limit_exec(filter_exec(sort_exec(
             sort_key.clone(),
             parquet_exec(),
@@ -4116,10 +4113,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_prior_to_sort_preserving_merge() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         // sort preserving merge already sorted input,
         let plan_parquet = sort_preserving_merge_exec(
             sort_key.clone(),
@@ -4146,10 +4143,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_sort_preserving_merge_with_union() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved)
         let input_parquet =
             union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]);
@@ -4180,10 +4177,10 @@ pub(crate) mod tests {
     #[test]
     fn parallelization_does_not_benefit() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         //  SortRequired
         //    Parquet(sorted)
         let plan_parquet = sort_required_exec_with_req(
@@ -4214,10 +4211,10 @@ pub(crate) mod tests {
     fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()> {
         // sorted input
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
 
         //Projection(a as a2, b as b2)
         let alias_pairs: Vec<(String, String)> = vec![
@@ -4228,10 +4225,10 @@ pub(crate) mod tests {
             parquet_exec_with_sort(vec![sort_key]),
             alias_pairs,
         );
-        let sort_key_after_projection = vec![PhysicalSortExpr {
+        let sort_key_after_projection = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c2", &proj_parquet.schema()).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan_parquet =
             sort_preserving_merge_exec(sort_key_after_projection, proj_parquet);
         let expected = &[
@@ -4255,10 +4252,10 @@ pub(crate) mod tests {
     fn parallelization_ignores_transitively_with_projection_csv() -> Result<()> {
         // sorted input
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
 
         //Projection(a as a2, b as b2)
         let alias_pairs: Vec<(String, String)> = vec![
@@ -4268,10 +4265,10 @@ pub(crate) mod tests {
 
         let proj_csv =
             projection_exec_with_alias(csv_exec_with_sort(vec![sort_key]), alias_pairs);
-        let sort_key_after_projection = vec![PhysicalSortExpr {
+        let sort_key_after_projection = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c2", &proj_csv.schema()).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv);
         let expected = &[
             "SortPreservingMergeExec: [c2@1 ASC]",
@@ -4318,10 +4315,10 @@ pub(crate) mod tests {
     #[test]
     fn remove_unnecessary_spm_after_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
         let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
 
@@ -4343,10 +4340,10 @@ pub(crate) mod tests {
     #[test]
     fn preserve_ordering_through_repartition() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("d", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
         let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
 
@@ -4366,10 +4363,10 @@ pub(crate) mod tests {
     #[test]
     fn do_not_preserve_ordering_through_repartition() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
         let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
 
@@ -4398,10 +4395,10 @@ pub(crate) mod tests {
     #[test]
     fn no_need_for_sort_after_filter() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
         let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
 
@@ -4422,16 +4419,16 @@ pub(crate) mod tests {
     #[test]
     fn do_not_preserve_ordering_through_repartition2() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key]);
 
-        let sort_req = vec![PhysicalSortExpr {
+        let sort_req = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let physical_plan = sort_preserving_merge_exec(sort_req, filter_exec(input));
 
         let expected = &[
@@ -4460,10 +4457,10 @@ pub(crate) mod tests {
     #[test]
     fn do_not_preserve_ordering_through_repartition3() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key]);
         let physical_plan = filter_exec(input);
 
@@ -4481,10 +4478,10 @@ pub(crate) mod tests {
     #[test]
     fn do_not_put_sort_when_input_is_invalid() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec();
         let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
         let expected = &[
@@ -4518,10 +4515,10 @@ pub(crate) mod tests {
     #[test]
     fn put_sort_when_input_is_valid() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("a", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
         let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
 
@@ -4555,10 +4552,10 @@ pub(crate) mod tests {
     #[test]
     fn do_not_add_unnecessary_hash() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let alias = vec![("a".to_string(), "a".to_string())];
         let input = parquet_exec_with_sort(vec![sort_key]);
         let physical_plan = aggregate_exec_with_alias(input, alias);
@@ -4578,10 +4575,10 @@ pub(crate) mod tests {
     #[test]
     fn do_not_add_unnecessary_hash2() -> Result<()> {
         let schema = schema();
-        let sort_key = vec![PhysicalSortExpr {
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
         let alias = vec![("a".to_string(), "a".to_string())];
         let input = parquet_exec_multiple_sorted(vec![sort_key]);
         let aggregate = aggregate_exec_with_alias(input, alias.clone());
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index aa28f9d6b6aa..7b111cddc6fd 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -61,13 +61,14 @@ use crate::physical_plan::{Distribution, ExecutionPlan, InputOrderMode};
 
 use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_physical_expr::{Partitioning, PhysicalSortExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::{Partitioning, PhysicalSortRequirement};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
 use datafusion_physical_plan::ExecutionPlanProperties;
 
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// This rule inspects [`SortExec`]'s in the given physical plan and removes the
@@ -231,7 +232,7 @@ fn replace_with_partial_sort(
         if common_prefix_length > 0 {
             return Ok(Arc::new(
                 PartialSortExec::new(
-                    sort_plan.expr().to_vec(),
+                    LexOrdering::new(sort_plan.expr().to_vec()),
                     sort_plan.input().clone(),
                     common_prefix_length,
                 )
@@ -275,7 +276,7 @@ fn parallelize_sorts(
         // Take the initial sort expressions and requirements
         let (sort_exprs, fetch) = get_sort_exprs(&requirements.plan)?;
         let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs);
-        let sort_exprs = sort_exprs.to_vec();
+        let sort_exprs = LexOrdering::new(sort_exprs.to_vec());
 
         // If there is a connection between a `CoalescePartitionsExec` and a
         // global sort that satisfy the requirements (i.e. intermediate
@@ -390,15 +391,14 @@ fn analyze_immediate_sort_removal(
     if let Some(sort_exec) = node.plan.as_any().downcast_ref::<SortExec>() {
         let sort_input = sort_exec.input();
         // If this sort is unnecessary, we should remove it:
-        if sort_input
-            .equivalence_properties()
-            .ordering_satisfy(sort_exec.properties().output_ordering().unwrap_or(&[]))
-        {
+        if sort_input.equivalence_properties().ordering_satisfy(
+            sort_exec.properties().output_ordering().unwrap_or_default(),
+        ) {
             node.plan = if !sort_exec.preserve_partitioning()
                 && sort_input.output_partitioning().partition_count() > 1
             {
                 // Replace the sort with a sort-preserving merge:
-                let expr = sort_exec.expr().to_vec();
+                let expr = LexOrdering::new(sort_exec.expr().to_vec());
                 Arc::new(SortPreservingMergeExec::new(expr, sort_input.clone())) as _
             } else {
                 // Remove the sort:
@@ -619,7 +619,10 @@ fn remove_corresponding_sort_from_sub_plan(
         // `SortPreservingMergeExec` instead of a `CoalescePartitionsExec`.
         let plan = node.plan.clone();
         let plan = if let Some(ordering) = plan.output_ordering() {
-            Arc::new(SortPreservingMergeExec::new(ordering.to_vec(), plan)) as _
+            Arc::new(SortPreservingMergeExec::new(
+                LexOrdering::new(ordering.to_vec()),
+                plan,
+            )) as _
         } else {
             Arc::new(CoalescePartitionsExec::new(plan)) as _
         };
@@ -629,10 +632,10 @@ fn remove_corresponding_sort_from_sub_plan(
     Ok(node)
 }
 
-/// Converts an [ExecutionPlan] trait object to a [PhysicalSortExpr] slice when possible.
+/// Converts an [ExecutionPlan] trait object to a [LexOrderingRef] when possible.
 fn get_sort_exprs(
     sort_any: &Arc<dyn ExecutionPlan>,
-) -> Result<(&[PhysicalSortExpr], Option<usize>)> {
+) -> Result<(LexOrderingRef, Option<usize>)> {
     if let Some(sort_exec) = sort_any.as_any().downcast_ref::<SortExec>() {
         Ok((sort_exec.expr(), sort_exec.fetch()))
     } else if let Some(spm) = sort_any.as_any().downcast_ref::<SortPreservingMergeExec>()
@@ -645,7 +648,6 @@ fn get_sort_exprs(
 
 #[cfg(test)]
 mod tests {
-
     use super::*;
     use crate::physical_optimizer::enforce_distribution::EnforceDistribution;
     use crate::physical_optimizer::test_utils::{
@@ -936,8 +938,8 @@ mod tests {
             "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
             "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "        SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "          SortPreservingMergeExec: [non_nullable_col@1 ASC]",
             "            SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "              MemoryExec: partitions=1, partition_sizes=[0]",
@@ -961,10 +963,10 @@ mod tests {
         let sort = sort_exec(sort_exprs.clone(), source);
         let spm = sort_preserving_merge_exec(sort_exprs, sort);
 
-        let sort_exprs = vec![
+        let sort_exprs = LexOrdering::new(vec![
             sort_expr("nullable_col", &schema),
             sort_expr("non_nullable_col", &schema),
-        ];
+        ]);
         let repartition_exec = repartition_exec(spm);
         let sort2 = Arc::new(
             SortExec::new(sort_exprs.clone(), repartition_exec)
@@ -979,8 +981,8 @@ mod tests {
         // it with a `CoalescePartitionsExec` instead of directly removing it.
         let expected_input = [
             "AggregateExec: mode=Final, gby=[], aggr=[]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "        SortPreservingMergeExec: [non_nullable_col@1 ASC]",
             "          SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
@@ -1006,7 +1008,7 @@ mod tests {
         let source2 = repartition_exec(memory_exec(&schema));
         let union = union_exec(vec![source1, source2]);
 
-        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
+        let sort_exprs = LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]);
         // let sort = sort_exec(sort_exprs.clone(), union);
         let sort = Arc::new(
             SortExec::new(sort_exprs.clone(), union).with_preserve_partitioning(true),
@@ -1029,7 +1031,7 @@ mod tests {
         // When removing a `SortPreservingMergeExec`, make sure that partitioning
         // requirements are not violated. In some cases, we may need to replace
         // it with a `CoalescePartitionsExec` instead of directly removing it.
-        let expected_input = ["SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+        let expected_input = ["SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "  FilterExec: NOT non_nullable_col@1",
             "    SortPreservingMergeExec: [non_nullable_col@1 ASC]",
             "      SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true]",
@@ -1039,8 +1041,8 @@ mod tests {
             "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "            MemoryExec: partitions=1, partition_sizes=[0]"];
 
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]",
             "    FilterExec: NOT non_nullable_col@1",
             "      UnionExec",
             "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
@@ -1085,8 +1087,11 @@ mod tests {
         let schema = create_test_schema()?;
         let source = memory_exec(&schema);
         let input = Arc::new(
-            SortExec::new(vec![sort_expr("non_nullable_col", &schema)], source)
-                .with_fetch(Some(2)),
+            SortExec::new(
+                LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]),
+                source,
+            )
+            .with_fetch(Some(2)),
         );
         let physical_plan = sort_exec(
             vec![
@@ -1097,12 +1102,12 @@ mod tests {
         );
 
         let expected_input = [
-            "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
-            "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "  MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -1115,26 +1120,29 @@ mod tests {
         let schema = create_test_schema()?;
         let source = memory_exec(&schema);
         let input = Arc::new(SortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 sort_expr("non_nullable_col", &schema),
                 sort_expr("nullable_col", &schema),
-            ],
+            ]),
             source,
         ));
 
         let physical_plan = Arc::new(
-            SortExec::new(vec![sort_expr("non_nullable_col", &schema)], input)
-                .with_fetch(Some(2)),
+            SortExec::new(
+                LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]),
+                input,
+            )
+            .with_fetch(Some(2)),
         ) as Arc<dyn ExecutionPlan>;
 
         let expected_input = [
             "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "  SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
             "GlobalLimitExec: skip=0, fetch=2",
-            "  SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -1147,7 +1155,7 @@ mod tests {
         let schema = create_test_schema()?;
         let source = memory_exec(&schema);
         let input = Arc::new(SortExec::new(
-            vec![sort_expr("non_nullable_col", &schema)],
+            LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]),
             source,
         ));
         let limit = Arc::new(LocalLimitExec::new(input, 2));
@@ -1160,14 +1168,14 @@ mod tests {
         );
 
         let expected_input = [
-            "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "  LocalLimitExec: fetch=2",
             "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
             "LocalLimitExec: fetch=2",
-            "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -1181,7 +1189,7 @@ mod tests {
         let source = memory_exec(&schema);
         // let input = sort_exec(vec![sort_expr("non_nullable_col", &schema)], source);
         let input = Arc::new(SortExec::new(
-            vec![sort_expr("non_nullable_col", &schema)],
+            LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]),
             source,
         ));
         let limit = Arc::new(GlobalLimitExec::new(input, 0, Some(5))) as _;
@@ -1253,24 +1261,24 @@ mod tests {
         let repartition = repartition_exec(union);
         let physical_plan = sort_preserving_merge_exec(sort_exprs, repartition);
 
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
             "    UnionExec",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
             "      GlobalLimitExec: skip=0, fetch=100",
             "        LocalLimitExec: fetch=100",
-            "          SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "          SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "            ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
 
         // We should keep the bottom `SortExec`.
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
             "      UnionExec",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
             "        GlobalLimitExec: skip=0, fetch=100",
             "          LocalLimitExec: fetch=100",
-            "            SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "            SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "              ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
@@ -1288,12 +1296,12 @@ mod tests {
         let sort = sort_exec(vec![sort_exprs[0].clone()], source);
         let physical_plan = sort_preserving_merge_exec(sort_exprs, sort);
         let expected_input = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "  MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -1317,7 +1325,7 @@ mod tests {
         let expected_input = [
             "SortPreservingMergeExec: [non_nullable_col@1 ASC]",
             "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "    SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "      MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         let expected_optimized = [
@@ -1409,17 +1417,17 @@ mod tests {
         // Input is an invalid plan. In this case rule should add required sorting in appropriate places.
         // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the
         // required ordering of SortPreservingMergeExec.
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  UnionExec",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
 
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
@@ -1450,7 +1458,7 @@ mod tests {
         // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above).
         let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
@@ -1490,20 +1498,20 @@ mod tests {
         // Should modify the plan to ensure that all three inputs to the
         // `UnionExec` satisfy the ordering, OR add a single sort after
         // the `UnionExec` (both of which are equally good for this example).
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  UnionExec",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
@@ -1542,9 +1550,9 @@ mod tests {
         // fine `SortExec`s below with required `SortExec`s that are absolutely necessary.
         let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
@@ -1588,7 +1596,7 @@ mod tests {
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "    SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         // Should adjust the requirement in the third input of the union so
@@ -1625,9 +1633,9 @@ mod tests {
         // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering.
         let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec
         let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
@@ -1676,9 +1684,9 @@ mod tests {
         // The `UnionExec` doesn't preserve any of the inputs ordering in the
         // example below.
         let expected_input = ["UnionExec",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "  SortExec: expr=[nullable_col@0 DESC NULLS LAST,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "  SortExec: expr=[nullable_col@0 DESC NULLS LAST, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         // Since `UnionExec` doesn't preserve ordering in the plan above.
         // We shouldn't keep SortExecs in the plan.
@@ -1744,10 +1752,10 @@ mod tests {
     async fn test_window_multi_path_sort2() -> Result<()> {
         let schema = create_test_schema()?;
 
-        let sort_exprs1 = vec![
+        let sort_exprs1 = LexOrdering::new(vec![
             sort_expr("nullable_col", &schema),
             sort_expr("non_nullable_col", &schema),
-        ];
+        ]);
         let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
         let source1 = parquet_exec_sorted(&schema, sort_exprs2.clone());
         let source2 = parquet_exec_sorted(&schema, sort_exprs2.clone());
@@ -1761,11 +1769,11 @@ mod tests {
         // The `WindowAggExec` can get its required sorting from the leaf nodes directly.
         // The unnecessary SortExecs should be removed
         let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
             "    UnionExec",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
         let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  SortPreservingMergeExec: [nullable_col@0 ASC]",
@@ -1810,11 +1818,11 @@ mod tests {
         // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec`
         let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    GlobalLimitExec: skip=0, fetch=100",
             "      LocalLimitExec: fetch=100",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "        SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
             "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
             "  UnionExec",
@@ -1822,7 +1830,7 @@ mod tests {
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    GlobalLimitExec: skip=0, fetch=100",
             "      LocalLimitExec: fetch=100",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "        SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
             "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
@@ -1867,7 +1875,7 @@ mod tests {
             let join_plan2 = format!(
                 "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
             );
-            let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]",
                 join_plan2.as_str(),
                 "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
                 "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
@@ -1879,7 +1887,7 @@ mod tests {
                     // can push down the sort requirements and save 1 SortExec
                     vec![
                         join_plan.as_str(),
-                        "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+                        "  SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
                         "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
                         "  SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
                         "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
@@ -1888,7 +1896,7 @@ mod tests {
                 _ => {
                     // can not push down the sort requirements
                     vec![
-                        "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+                        "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
                         join_plan2.as_str(),
                         "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
                         "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
@@ -1938,9 +1946,9 @@ mod tests {
             );
             let spm_plan = match join_type {
                 JoinType::RightAnti => {
-                    "SortPreservingMergeExec: [col_a@0 ASC,col_b@1 ASC]"
+                    "SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC]"
                 }
-                _ => "SortPreservingMergeExec: [col_a@2 ASC,col_b@3 ASC]",
+                _ => "SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]",
             };
             let join_plan2 = format!(
                 "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
@@ -1956,14 +1964,14 @@ mod tests {
                         join_plan.as_str(),
                         "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
                         "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                        "  SortExec: expr=[col_a@0 ASC,col_b@1 ASC], preserve_partitioning=[false]",
+                        "  SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]",
                         "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
                     ]
                 }
                 _ => {
                     // can not push down the sort requirements for Left and Full join.
                     vec![
-                        "SortExec: expr=[col_a@2 ASC,col_b@3 ASC], preserve_partitioning=[false]",
+                        "SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false]",
                         join_plan2.as_str(),
                         "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
                         "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
@@ -2001,13 +2009,13 @@ mod tests {
         ];
         let physical_plan = sort_preserving_merge_exec(sort_exprs1, join.clone());
 
-        let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC,col_a@2 ASC]",
+        let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]",
             "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
 
         // can not push down the sort requirements, need to add SortExec
-        let expected_optimized = ["SortExec: expr=[col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
+        let expected_optimized = ["SortExec: expr=[col_b@3 ASC, col_a@2 ASC], preserve_partitioning=[false]",
             "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
@@ -2023,13 +2031,13 @@ mod tests {
         ];
         let physical_plan = sort_preserving_merge_exec(sort_exprs2, join);
 
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC]",
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]",
             "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
             "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
 
         // can not push down the sort requirements, need to add SortExec
-        let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
+        let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC], preserve_partitioning=[false]",
             "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
             "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
             "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
@@ -2069,7 +2077,7 @@ mod tests {
         let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
             "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
             "        MemoryExec: partitions=1, partition_sizes=[0]"];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
@@ -2124,7 +2132,7 @@ mod tests {
         let state = session_ctx.state();
 
         let memory_exec = memory_exec(&schema);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort_exprs = LexOrdering::new(vec![sort_expr("nullable_col", &schema)]);
         let window = bounded_window_exec("nullable_col", sort_exprs.clone(), memory_exec);
         let repartition = repartition_exec(window);
 
@@ -2174,7 +2182,7 @@ mod tests {
         let repartition = repartition_exec(source);
         let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(repartition));
         let repartition = repartition_exec(coalesce_partitions);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort_exprs = LexOrdering::new(vec![sort_expr("nullable_col", &schema)]);
         // Add local sort
         let sort = Arc::new(
             SortExec::new(sort_exprs.clone(), repartition)
@@ -2332,11 +2340,11 @@ mod tests {
         let physical_plan = sort_exec(vec![sort_expr("b", &schema)], spm);
 
         let expected_input = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "  SortPreservingMergeExec: [a@0 ASC, b@1 ASC]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
         let expected_optimized = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "  SortPreservingMergeExec: [a@0 ASC, b@1 ASC]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
         assert_optimized!(expected_input, expected_optimized, physical_plan, false);
@@ -2360,12 +2368,12 @@ mod tests {
             spm,
         );
 
-        let expected_input = ["SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+        let expected_input = ["SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]",
+            "  SortPreservingMergeExec: [a@0 ASC, b@1 ASC]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
-        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[true]",
+        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC, b@1 ASC]",
+            "  SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true]",
             "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
             "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
         assert_optimized!(expected_input, expected_optimized, physical_plan, false);
@@ -2387,15 +2395,15 @@ mod tests {
 
         let expected_input = [
             "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC",
+            "  SortPreservingMergeExec: [a@0 ASC, b@1 ASC]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "        SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]",
             "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
         ];
         let expected_optimized = [
             "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]",
             "    CoalescePartitionsExec",
             "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
             "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
@@ -2418,11 +2426,11 @@ mod tests {
         );
 
         let expected_input = [
-            "SortExec: expr=[a@0 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[a@0 ASC, c@2 ASC], preserve_partitioning=[false]",
             "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]"
         ];
         let expected_optimized = [
-            "PartialSortExec: expr=[a@0 ASC,c@2 ASC], common_prefix_length=[1]",
+            "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]",
             "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -2445,12 +2453,12 @@ mod tests {
         );
 
         let expected_input = [
-            "SortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], preserve_partitioning=[false]",
             "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]"
         ];
         // let optimized
         let expected_optimized = [
-            "PartialSortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], common_prefix_length=[2]",
+            "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]",
             "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]",
         ];
         assert_optimized!(expected_input, expected_optimized, physical_plan, true);
@@ -2472,7 +2480,7 @@ mod tests {
             parquet_input,
         );
         let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]",
             "  ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC]"
         ];
         let expected_no_change = expected_input;
@@ -2495,7 +2503,7 @@ mod tests {
             unbounded_input,
         );
         let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]",
             "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]"
         ];
         let expected_no_change = expected_input;
@@ -2510,8 +2518,8 @@ mod tests {
         //    SortExec: expr=[a]
         //      MemoryExec
         let schema = create_test_schema3()?;
-        let sort_exprs_a = vec![sort_expr("a", &schema)];
-        let sort_exprs_b = vec![sort_expr("b", &schema)];
+        let sort_exprs_a = LexOrdering::new(vec![sort_expr("a", &schema)]);
+        let sort_exprs_b = LexOrdering::new(vec![sort_expr("b", &schema)]);
         let plan = memory_exec(&schema);
         let plan = sort_exec(sort_exprs_a.clone(), plan);
         let plan = RequirementsTestExec::new(plan)
@@ -2540,8 +2548,9 @@ mod tests {
         //    SortExec: expr=[a]
         //      MemoryExec
         let schema = create_test_schema3()?;
-        let sort_exprs_a = vec![sort_expr("a", &schema)];
-        let sort_exprs_ab = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let sort_exprs_a = LexOrdering::new(vec![sort_expr("a", &schema)]);
+        let sort_exprs_ab =
+            LexOrdering::new(vec![sort_expr("a", &schema), sort_expr("b", &schema)]);
         let plan = memory_exec(&schema);
         let plan = sort_exec(sort_exprs_a.clone(), plan);
         let plan = RequirementsTestExec::new(plan)
@@ -2551,7 +2560,7 @@ mod tests {
         let plan = sort_exec(sort_exprs_ab, plan);
 
         let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]",
             "  RequiredInputOrderingExec",
             "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
             "      MemoryExec: partitions=1, partition_sizes=[0]",
@@ -2559,7 +2568,7 @@ mod tests {
         // should able to push shorts
         let expected = [
             "RequiredInputOrderingExec",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]",
             "    MemoryExec: partitions=1, partition_sizes=[0]",
         ];
         assert_optimized!(expected_input, expected, plan, true);
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 2bf706f33d60..0312e362afb1 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -40,7 +40,8 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{internal_err, JoinSide, JoinType};
 use datafusion_expr::sort_properties::SortProperties;
 use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 
 /// The [`JoinSelection`] rule tries to modify a given plan so that it can
@@ -553,7 +554,7 @@ fn hash_join_convert_symmetric_subrule(
             // the function concludes that no specific order is required for the SymmetricHashJoinExec. This approach
             // ensures that the symmetric hash join operation only imposes ordering constraints when necessary,
             // based on the properties of the child nodes and the filter condition.
-            let determine_order = |side: JoinSide| -> Option<Vec<PhysicalSortExpr>> {
+            let determine_order = |side: JoinSide| -> Option<LexOrdering> {
                 hash_join
                     .filter()
                     .map(|filter| {
@@ -594,7 +595,7 @@ fn hash_join_convert_symmetric_subrule(
                             JoinSide::Right => hash_join.right().output_ordering(),
                             JoinSide::None => unreachable!(),
                         }
-                        .map(|p| p.to_vec())
+                        .map(|p| LexOrdering::new(p.to_vec()))
                     })
                     .flatten()
             };
@@ -724,7 +725,6 @@ fn apply_subrules(
 
 #[cfg(test)]
 mod tests_statistical {
-
     use super::*;
     use crate::{
         physical_plan::{displayable, ColumnStatistics, Statistics},
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index b4dd0a995d5f..5aecf036ce18 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -54,7 +54,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::streaming::StreamingTableExec;
 use datafusion_physical_plan::union::UnionExec;
 
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use itertools::Itertools;
 
@@ -246,7 +246,7 @@ fn try_swapping_with_streaming_table(
 
     let mut lex_orderings = vec![];
     for lex_ordering in streaming_table.projected_output_ordering().into_iter() {
-        let mut orderings = vec![];
+        let mut orderings = LexOrdering::default();
         for order in lex_ordering {
             let Some(new_ordering) = update_expr(&order.expr, projection.expr(), false)?
             else {
@@ -467,7 +467,7 @@ fn try_swapping_with_sort(
         return Ok(None);
     }
 
-    let mut updated_exprs = vec![];
+    let mut updated_exprs = LexOrdering::default();
     for sort in sort.expr() {
         let Some(new_expr) = update_expr(&sort.expr, projection.expr(), false)? else {
             return Ok(None);
@@ -497,7 +497,7 @@ fn try_swapping_with_sort_preserving_merge(
         return Ok(None);
     }
 
-    let mut updated_exprs = vec![];
+    let mut updated_exprs = LexOrdering::default();
     for sort in spm.expr() {
         let Some(updated_expr) = update_expr(&sort.expr, projection.expr(), false)?
         else {
@@ -915,8 +915,14 @@ fn try_swapping_with_sym_hash_join(
         new_filter,
         sym_join.join_type(),
         sym_join.null_equals_null(),
-        sym_join.right().output_ordering().map(|p| p.to_vec()),
-        sym_join.left().output_ordering().map(|p| p.to_vec()),
+        sym_join
+            .right()
+            .output_ordering()
+            .map(|p| LexOrdering::new(p.to_vec())),
+        sym_join
+            .left()
+            .output_ordering()
+            .map(|p| LexOrdering::new(p.to_vec())),
         sym_join.partition_mode(),
     )?)))
 }
@@ -1863,7 +1869,7 @@ mod tests {
             }) as _],
             Some(&vec![0_usize, 2, 4, 3]),
             vec![
-                vec![
+                LexOrdering::new(vec![
                     PhysicalSortExpr {
                         expr: Arc::new(Column::new("e", 2)),
                         options: SortOptions::default(),
@@ -1872,11 +1878,11 @@ mod tests {
                         expr: Arc::new(Column::new("a", 0)),
                         options: SortOptions::default(),
                     },
-                ],
-                vec![PhysicalSortExpr {
+                ]),
+                LexOrdering::new(vec![PhysicalSortExpr {
                     expr: Arc::new(Column::new("d", 3)),
                     options: SortOptions::default(),
-                }],
+                }]),
             ]
             .into_iter(),
             true,
@@ -1923,7 +1929,7 @@ mod tests {
         assert_eq!(
             result.projected_output_ordering().into_iter().collect_vec(),
             vec![
-                vec![
+                LexOrdering::new(vec![
                     PhysicalSortExpr {
                         expr: Arc::new(Column::new("e", 1)),
                         options: SortOptions::default(),
@@ -1932,11 +1938,11 @@ mod tests {
                         expr: Arc::new(Column::new("a", 2)),
                         options: SortOptions::default(),
                     },
-                ],
-                vec![PhysicalSortExpr {
+                ]),
+                LexOrdering::new(vec![PhysicalSortExpr {
                     expr: Arc::new(Column::new("d", 0)),
                     options: SortOptions::default(),
-                }],
+                }]),
             ]
         );
         assert!(result.is_infinite());
@@ -2553,7 +2559,7 @@ mod tests {
     fn test_sort_after_projection() -> Result<()> {
         let csv = create_simple_csv_exec();
         let sort_req: Arc<dyn ExecutionPlan> = Arc::new(SortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::new(Column::new("b", 1)),
                     options: SortOptions::default(),
@@ -2566,7 +2572,7 @@ mod tests {
                     )),
                     options: SortOptions::default(),
                 },
-            ],
+            ]),
             csv.clone(),
         ));
         let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
@@ -2581,7 +2587,7 @@ mod tests {
         let initial = get_plan_string(&projection);
         let expected_initial = [
             "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  SortExec: expr=[b@1 ASC,c@2 + a@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]",
             "    CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"
             ];
         assert_eq!(initial, expected_initial);
@@ -2590,7 +2596,7 @@ mod tests {
             ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
         let expected = [
-            "SortExec: expr=[b@2 ASC,c@0 + new_a@1 ASC], preserve_partitioning=[false]",
+            "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]",
             "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
             "    CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"
         ];
@@ -2603,7 +2609,7 @@ mod tests {
     fn test_sort_preserving_after_projection() -> Result<()> {
         let csv = create_simple_csv_exec();
         let sort_req: Arc<dyn ExecutionPlan> = Arc::new(SortPreservingMergeExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::new(Column::new("b", 1)),
                     options: SortOptions::default(),
@@ -2616,7 +2622,7 @@ mod tests {
                     )),
                     options: SortOptions::default(),
                 },
-            ],
+            ]),
             csv.clone(),
         ));
         let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
@@ -2631,7 +2637,7 @@ mod tests {
         let initial = get_plan_string(&projection);
         let expected_initial = [
             "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  SortPreservingMergeExec: [b@1 ASC,c@2 + a@0 ASC]",
+            "  SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]",
             "    CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"
             ];
         assert_eq!(initial, expected_initial);
@@ -2640,7 +2646,7 @@ mod tests {
             ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
         let expected = [
-            "SortPreservingMergeExec: [b@2 ASC,c@0 + new_a@1 ASC]",
+            "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]",
             "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
             "    CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"
         ];
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
index a989be987d3d..930ce52e6fa2 100644
--- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -33,6 +33,7 @@ use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::tree_node::PlanContext;
 use datafusion_physical_plan::ExecutionPlanProperties;
 
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use itertools::izip;
 
 /// For a given `plan`, this object carries the information one needs from its
@@ -131,7 +132,8 @@ fn plan_with_order_preserving_variants(
         if let Some(ordering) = child.output_ordering().map(Vec::from) {
             // When the input of a `CoalescePartitionsExec` has an ordering,
             // replace it with a `SortPreservingMergeExec` if appropriate:
-            let spm = SortPreservingMergeExec::new(ordering, child.clone());
+            let spm =
+                SortPreservingMergeExec::new(LexOrdering::new(ordering), child.clone());
             sort_input.plan = Arc::new(spm) as _;
             sort_input.children[0].data = true;
             return Ok(sort_input);
@@ -255,7 +257,7 @@ pub(crate) fn replace_with_order_preserving_variants(
     if alternate_plan
         .plan
         .equivalence_properties()
-        .ordering_satisfy(requirements.plan.output_ordering().unwrap_or(&[]))
+        .ordering_satisfy(requirements.plan.output_ordering().unwrap_or_default())
     {
         for child in alternate_plan.children.iter_mut() {
             child.data = false;
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index fdbda1fe52f7..9eb200f534db 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -36,10 +36,10 @@ use datafusion_common::{plan_err, JoinSide, Result};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::collect_columns;
-use datafusion_physical_expr::{
-    LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement,
+use datafusion_physical_expr::{LexRequirementRef, PhysicalSortRequirement};
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrdering, LexOrderingRef, LexRequirement,
 };
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
 
 use hashbrown::HashSet;
 
@@ -235,7 +235,7 @@ fn pushdown_requirement_to_children(
             Some(JoinSide::Left) => try_pushdown_requirements_to_join(
                 smj,
                 parent_required,
-                &parent_required_expr,
+                parent_required_expr.as_ref(),
                 JoinSide::Left,
             ),
             Some(JoinSide::Right) => {
@@ -248,7 +248,7 @@ fn pushdown_requirement_to_children(
                 try_pushdown_requirements_to_join(
                     smj,
                     parent_required,
-                    &new_right_required_expr,
+                    new_right_required_expr.as_ref(),
                     JoinSide::Right,
                 )
             }
@@ -277,7 +277,7 @@ fn pushdown_requirement_to_children(
         spm_eqs = spm_eqs.with_reorder(new_ordering);
         // Do not push-down through SortPreservingMergeExec when
         // ordering requirement invalidates requirement of sort preserving merge exec.
-        if !spm_eqs.ordering_satisfy(plan.output_ordering().unwrap_or(&[])) {
+        if !spm_eqs.ordering_satisfy(plan.output_ordering().unwrap_or_default()) {
             Ok(None)
         } else {
             // Can push-down through SortPreservingMergeExec, because parent requirement is finer
@@ -344,10 +344,11 @@ fn determine_children_requirement(
         RequirementsCompatibility::NonCompatible
     }
 }
+
 fn try_pushdown_requirements_to_join(
     smj: &SortMergeJoinExec,
     parent_required: LexRequirementRef,
-    sort_expr: &[PhysicalSortExpr],
+    sort_expr: LexOrderingRef,
     push_side: JoinSide,
 ) -> Result<Option<Vec<Option<LexRequirement>>>> {
     let left_eq_properties = smj.left().equivalence_properties();
@@ -355,13 +356,13 @@ fn try_pushdown_requirements_to_join(
     let mut smj_required_orderings = smj.required_input_ordering();
     let right_requirement = smj_required_orderings.swap_remove(1);
     let left_requirement = smj_required_orderings.swap_remove(0);
-    let left_ordering = smj.left().output_ordering().unwrap_or(&[]);
-    let right_ordering = smj.right().output_ordering().unwrap_or(&[]);
+    let left_ordering = smj.left().output_ordering().unwrap_or_default();
+    let right_ordering = smj.right().output_ordering().unwrap_or_default();
     let (new_left_ordering, new_right_ordering) = match push_side {
         JoinSide::Left => {
             let left_eq_properties = left_eq_properties
                 .clone()
-                .with_reorder(Vec::from(sort_expr));
+                .with_reorder(LexOrdering::from_ref(sort_expr));
             if left_eq_properties
                 .ordering_satisfy_requirement(&left_requirement.unwrap_or_default())
             {
@@ -374,7 +375,7 @@ fn try_pushdown_requirements_to_join(
         JoinSide::Right => {
             let right_eq_properties = right_eq_properties
                 .clone()
-                .with_reorder(Vec::from(sort_expr));
+                .with_reorder(LexOrdering::from_ref(sort_expr));
             if right_eq_properties
                 .ordering_satisfy_requirement(&right_requirement.unwrap_or_default())
             {
@@ -418,7 +419,7 @@ fn try_pushdown_requirements_to_join(
 }
 
 fn expr_source_side(
-    required_exprs: &[PhysicalSortExpr],
+    required_exprs: LexOrderingRef,
     join_type: JoinType,
     left_columns_len: usize,
 ) -> Option<JoinSide> {
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index 98f1a7c21a39..bdf16300ea87 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -57,7 +57,7 @@ use datafusion_physical_plan::{
 use async_trait::async_trait;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr_common::sort_expr::{
-    LexRequirement, PhysicalSortRequirement,
+    LexOrdering, LexRequirement, PhysicalSortRequirement,
 };
 
 async fn register_current_csv(
@@ -243,7 +243,7 @@ pub fn bounded_window_exec(
     sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
     input: Arc<dyn ExecutionPlan>,
 ) -> Arc<dyn ExecutionPlan> {
-    let sort_exprs: Vec<_> = sort_exprs.into_iter().collect();
+    let sort_exprs: LexOrdering = sort_exprs.into_iter().collect();
     let schema = input.schema();
 
     Arc::new(
@@ -253,7 +253,7 @@ pub fn bounded_window_exec(
                 "count".to_owned(),
                 &[col(col_name, &schema).unwrap()],
                 &[],
-                &sort_exprs,
+                sort_exprs.as_ref(),
                 Arc::new(WindowFrame::new(Some(false))),
                 schema.as_ref(),
                 false,
@@ -364,7 +364,7 @@ pub fn sort_exec(
 /// A test [`ExecutionPlan`] whose requirements can be configured.
 #[derive(Debug)]
 pub struct RequirementsTestExec {
-    required_input_ordering: Vec<PhysicalSortExpr>,
+    required_input_ordering: LexOrdering,
     maintains_input_order: bool,
     input: Arc<dyn ExecutionPlan>,
 }
@@ -372,7 +372,7 @@ pub struct RequirementsTestExec {
 impl RequirementsTestExec {
     pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
         Self {
-            required_input_ordering: vec![],
+            required_input_ordering: LexOrdering::default(),
             maintains_input_order: true,
             input,
         }
@@ -381,7 +381,7 @@ impl RequirementsTestExec {
     /// sets the required input ordering
     pub fn with_required_input_ordering(
         mut self,
-        required_input_ordering: Vec<PhysicalSortExpr>,
+        required_input_ordering: LexOrdering,
     ) -> Self {
         self.required_input_ordering = required_input_ordering;
         self
@@ -419,8 +419,9 @@ impl ExecutionPlan for RequirementsTestExec {
     }
 
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
-        let requirement =
-            PhysicalSortRequirement::from_sort_exprs(&self.required_input_ordering);
+        let requirement = PhysicalSortRequirement::from_sort_exprs(
+            self.required_input_ordering.as_ref().iter(),
+        );
         vec![Some(requirement)]
     }
 
diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
index 26cdd65883e4..d85278556cc4 100644
--- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
+++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
@@ -138,12 +138,12 @@ fn try_convert_aggregate_if_better(
     aggr_exprs
         .into_iter()
         .map(|aggr_expr| {
-            let aggr_sort_exprs = aggr_expr.order_bys().unwrap_or(&[]);
+            let aggr_sort_exprs = &aggr_expr.order_bys().unwrap_or_default();
             let reverse_aggr_sort_exprs = reverse_order_bys(aggr_sort_exprs);
             let aggr_sort_reqs =
-                PhysicalSortRequirement::from_sort_exprs(aggr_sort_exprs);
+                PhysicalSortRequirement::from_sort_exprs(aggr_sort_exprs.iter());
             let reverse_aggr_req =
-                PhysicalSortRequirement::from_sort_exprs(&reverse_aggr_sort_exprs);
+                PhysicalSortRequirement::from_sort_exprs(&reverse_aggr_sort_exprs.inner);
 
             // If the aggregate expression benefits from input ordering, and
             // there is an actual ordering enabling this, try to update the
diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs
index 2c0d042281e6..8007d8cc7f00 100644
--- a/datafusion/core/src/physical_optimizer/utils.rs
+++ b/datafusion/core/src/physical_optimizer/utils.rs
@@ -39,7 +39,7 @@ pub fn add_sort_above<T: Clone + Default>(
     fetch: Option<usize>,
 ) -> PlanContext<T> {
     let mut sort_expr = PhysicalSortRequirement::to_sort_exprs(sort_requirements);
-    sort_expr.retain(|sort_expr| {
+    sort_expr.inner.retain(|sort_expr| {
         !node
             .plan
             .equivalence_properties()
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index c16f3ad104b8..2a96a2ad111f 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1521,7 +1521,7 @@ pub fn create_window_expr_with_name(
                 name,
                 &physical_args,
                 &partition_by,
-                &order_by,
+                order_by.as_ref(),
                 window_frame,
                 physical_schema,
                 ignore_nulls,
@@ -1550,7 +1550,7 @@ type AggregateExprWithOptionalArgs = (
     // The filter clause, if any
     Option<Arc<dyn PhysicalExpr>>,
     // Ordering requirements, if any
-    Option<Vec<PhysicalSortExpr>>,
+    Option<LexOrdering>,
 );
 
 /// Create an aggregate expression with a name from a logical expression
@@ -1600,12 +1600,12 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                     None => None,
                 };
 
-                let ordering_reqs: Vec<PhysicalSortExpr> =
-                    physical_sort_exprs.clone().unwrap_or(vec![]);
+                let ordering_reqs: LexOrdering =
+                    physical_sort_exprs.clone().unwrap_or_default();
 
                 let agg_expr =
                     AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
-                        .order_by(ordering_reqs.to_vec())
+                        .order_by(ordering_reqs)
                         .schema(Arc::new(physical_input_schema.to_owned()))
                         .alias(name)
                         .with_ignore_nulls(ignore_nulls)
@@ -1674,7 +1674,7 @@ pub fn create_physical_sort_exprs(
     exprs
         .iter()
         .map(|expr| create_physical_sort_expr(expr, input_dfschema, execution_props))
-        .collect::<Result<Vec<_>>>()
+        .collect::<Result<LexOrdering>>()
 }
 
 impl DefaultPhysicalPlanner {
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index d715635c5951..21f604e6c60f 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -39,15 +39,15 @@ use datafusion_physical_expr::PhysicalSortExpr;
 use datafusion_physical_plan::InputOrderMode;
 use test_utils::{add_empty_batches, StringBatchGenerator};
 
+use crate::fuzz_cases::aggregation_fuzzer::{
+    AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
+};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use hashbrown::HashMap;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tokio::task::JoinSet;
 
-use crate::fuzz_cases::aggregation_fuzzer::{
-    AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
-};
-
 // ========================================================================
 //  The new aggregation fuzz tests based on [`AggregationFuzzer`]
 // ========================================================================
@@ -234,7 +234,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
     let schema = input1[0].schema();
     let session_config = SessionConfig::new().with_batch_size(50);
     let ctx = SessionContext::new_with_config(session_config);
-    let mut sort_keys = vec![];
+    let mut sort_keys = LexOrdering::default();
     for ordering_col in ["a", "b", "c"] {
         sort_keys.push(PhysicalSortExpr {
             expr: col(ordering_col, &schema).unwrap(),
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
index 4fa1b7aa263d..aafa5ed7f66b 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
@@ -25,6 +25,7 @@ use arrow_array::{ArrayRef, RecordBatch};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
 use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::sorts::sort::sort_batch;
 use rand::{
     rngs::{StdRng, ThreadRng},
@@ -140,8 +141,8 @@ impl DatasetGenerator {
                     let col_expr = col(key, schema)?;
                     Ok(PhysicalSortExpr::new_default(col_expr))
                 })
-                .collect::<Result<Vec<_>>>()?;
-            let sorted_batch = sort_batch(&base_batch, &sort_exprs, None)?;
+                .collect::<Result<LexOrdering>>()?;
+            let sorted_batch = sort_batch(&base_batch, sort_exprs.as_ref(), None)?;
 
             let batches = stagger_batch(sorted_batch);
             let dataset = Dataset::new(batches, sort_keys);
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
index 94157e11702c..525baadd14a5 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs
@@ -25,7 +25,7 @@ use datafusion_common::{DFSchema, Result};
 use datafusion_expr::{Operator, ScalarUDF};
 use datafusion_physical_expr::expressions::{col, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use itertools::Itertools;
 use std::sync::Arc;
 
@@ -62,7 +62,7 @@ fn test_ordering_satisfy_with_equivalence_random() -> Result<()> {
                         expr: Arc::clone(expr),
                         options: SORT_OPTIONS,
                     })
-                    .collect::<Vec<_>>();
+                    .collect::<LexOrdering>();
                 let expected = is_table_same_after_sort(
                     requirement.clone(),
                     table_data_with_properties.clone(),
@@ -74,7 +74,7 @@ fn test_ordering_satisfy_with_equivalence_random() -> Result<()> {
                 // Check whether ordering_satisfy API result and
                 // experimental result matches.
                 assert_eq!(
-                    eq_properties.ordering_satisfy(&requirement),
+                    eq_properties.ordering_satisfy(requirement.as_ref()),
                     expected,
                     "{}",
                     err_msg
@@ -135,7 +135,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> {
                         expr: Arc::clone(expr),
                         options: SORT_OPTIONS,
                     })
-                    .collect::<Vec<_>>();
+                    .collect::<LexOrdering>();
                 let expected = is_table_same_after_sort(
                     requirement.clone(),
                     table_data_with_properties.clone(),
@@ -148,7 +148,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> {
                 // experimental result matches.
 
                 assert_eq!(
-                    eq_properties.ordering_satisfy(&requirement),
+                    eq_properties.ordering_satisfy(requirement.as_ref()),
                     (expected | false),
                     "{}",
                     err_msg
@@ -311,7 +311,7 @@ fn test_ordering_satisfy_with_equivalence() -> Result<()> {
                 expr: Arc::clone(expr),
                 options,
             })
-            .collect::<Vec<_>>();
+            .collect::<LexOrdering>();
 
         // Check expected result with experimental result.
         assert_eq!(
@@ -322,7 +322,7 @@ fn test_ordering_satisfy_with_equivalence() -> Result<()> {
             expected
         );
         assert_eq!(
-            eq_properties.ordering_satisfy(&required),
+            eq_properties.ordering_satisfy(required.as_ref()),
             expected,
             "{err_msg}"
         );
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
index c0c8517a612b..3df3e0348e42 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
@@ -25,7 +25,7 @@ use datafusion_expr::{Operator, ScalarUDF};
 use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::expressions::{col, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use itertools::Itertools;
 use std::sync::Arc;
 
@@ -173,7 +173,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> {
                                 expr: Arc::clone(expr),
                                 options: SORT_OPTIONS,
                             })
-                            .collect::<Vec<_>>();
+                            .collect::<LexOrdering>();
                         let expected = is_table_same_after_sort(
                             requirement.clone(),
                             projected_batch.clone(),
@@ -185,7 +185,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> {
                         // Check whether ordering_satisfy API result and
                         // experimental result matches.
                         assert_eq!(
-                            projected_eq.ordering_satisfy(&requirement),
+                            projected_eq.ordering_satisfy(requirement.as_ref()),
                             expected,
                             "{}",
                             err_msg
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
index e704fcacc328..82586bd79eda 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs
@@ -23,7 +23,7 @@ use datafusion_common::{DFSchema, Result};
 use datafusion_expr::{Operator, ScalarUDF};
 use datafusion_physical_expr::expressions::{col, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use itertools::Itertools;
 use std::sync::Arc;
 
@@ -76,7 +76,7 @@ fn test_find_longest_permutation_random() -> Result<()> {
                         expr: Arc::clone(&exprs[idx]),
                         options: sort_expr.options,
                     })
-                    .collect::<Vec<_>>();
+                    .collect::<LexOrdering>();
                 assert_eq!(
                     ordering, ordering2,
                     "indices and lexicographical ordering do not match"
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
index acc45fe0e591..35da8b596380 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs
@@ -223,7 +223,7 @@ fn add_equal_conditions_test() -> Result<()> {
 /// If the table remains the same after sorting with the added unique column, it indicates that the table was
 /// already sorted according to `required_ordering` to begin with.
 pub fn is_table_same_after_sort(
-    mut required_ordering: Vec<PhysicalSortExpr>,
+    mut required_ordering: LexOrdering,
     batch: RecordBatch,
 ) -> Result<bool> {
     // Clone the original schema and columns
@@ -444,7 +444,7 @@ pub fn generate_table_for_orderings(
 
     assert!(!orderings.is_empty());
     // Sort the inner vectors by their lengths (longest first)
-    orderings.sort_by_key(|v| std::cmp::Reverse(v.len()));
+    orderings.sort_by_key(|v| std::cmp::Reverse(v.inner.len()));
 
     let arrays = schema
         .fields
@@ -459,13 +459,13 @@ pub fn generate_table_for_orderings(
     let batch = RecordBatch::try_from_iter(arrays)?;
 
     // Sort batch according to first ordering expression
-    let sort_columns = get_sort_columns(&batch, &orderings[0])?;
+    let sort_columns = get_sort_columns(&batch, orderings[0].as_ref())?;
     let sort_indices = lexsort_to_indices(&sort_columns, None)?;
     let mut batch = take_record_batch(&batch, &sort_indices)?;
 
     // prune out rows that is invalid according to remaining orderings.
     for ordering in orderings.iter().skip(1) {
-        let sort_columns = get_sort_columns(&batch, ordering)?;
+        let sort_columns = get_sort_columns(&batch, ordering.as_ref())?;
 
         // Collect sort options and values into separate vectors.
         let (sort_options, sort_col_values): (Vec<_>, Vec<_>) = sort_columns
@@ -495,7 +495,7 @@ pub fn generate_table_for_orderings(
 // Convert each tuple to PhysicalSortExpr
 pub fn convert_to_sort_exprs(
     in_data: &[(&Arc<dyn PhysicalExpr>, SortOptions)],
-) -> Vec<PhysicalSortExpr> {
+) -> LexOrdering {
     in_data
         .iter()
         .map(|(expr, options)| PhysicalSortExpr {
@@ -508,7 +508,7 @@ pub fn convert_to_sort_exprs(
 // Convert each inner tuple to PhysicalSortExpr
 pub fn convert_to_orderings(
     orderings: &[Vec<(&Arc<dyn PhysicalExpr>, SortOptions)>],
-) -> Vec<Vec<PhysicalSortExpr>> {
+) -> Vec<LexOrdering> {
     orderings
         .iter()
         .map(|sort_exprs| convert_to_sort_exprs(sort_exprs))
diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
index 4eb1070e6c85..4e895920dd3d 100644
--- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs
@@ -31,6 +31,7 @@ use datafusion::physical_plan::{
     sorts::sort_preserving_merge::SortPreservingMergeExec,
 };
 use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use test_utils::{batches_to_vec, partitions_to_sorted_vec, stagger_batch_with_seed};
 
 #[tokio::test]
@@ -107,13 +108,13 @@ async fn run_merge_test(input: Vec<Vec<RecordBatch>>) {
             .expect("at least one batch");
         let schema = first_batch.schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("x", &schema).unwrap(),
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
 
         let exec = MemoryExec::try_new(&input, schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
index 4ba06ef1d2a6..e4acb96f4930 100644
--- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
@@ -30,6 +30,7 @@ use datafusion::physical_plan::{collect, ExecutionPlan};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_execution::memory_pool::GreedyMemoryPool;
 use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use rand::Rng;
 use std::sync::Arc;
 use test_utils::{batches_to_vec, partitions_to_sorted_vec};
@@ -114,13 +115,13 @@ impl SortTest {
             .expect("at least one batch");
         let schema = first_batch.schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("x", &schema).unwrap(),
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
 
         let exec = MemoryExec::try_new(&input, schema, None).unwrap();
         let sort = Arc::new(SortExec::new(sort, Arc::new(exec)));
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index 353db8668363..73f4a569954e 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -45,6 +45,7 @@ mod sp_repartition_fuzz_tests {
     };
     use test_utils::add_empty_batches;
 
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
     use itertools::izip;
     use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
 
@@ -345,7 +346,7 @@ mod sp_repartition_fuzz_tests {
         let schema = input1[0].schema();
         let session_config = SessionConfig::new().with_batch_size(50);
         let ctx = SessionContext::new_with_config(session_config);
-        let mut sort_keys = vec![];
+        let mut sort_keys = LexOrdering::default();
         for ordering_col in ["a", "b", "c"] {
             sort_keys.push(PhysicalSortExpr {
                 expr: col(ordering_col, &schema).unwrap(),
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 61b4e32ad6c9..5bfb4d97ed70 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -47,6 +47,7 @@ use test_utils::add_empty_batches;
 use datafusion::functions_window::row_number::row_number_udwf;
 use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
 use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use hashbrown::HashMap;
 use rand::distributions::Alphanumeric;
 use rand::rngs::StdRng;
@@ -251,7 +252,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
     ];
 
     let partitionby_exprs = vec![];
-    let orderby_exprs = vec![];
+    let orderby_exprs = LexOrdering::default();
     // Window frame starts with "UNBOUNDED PRECEDING":
     let start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None));
 
@@ -284,7 +285,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
                     fn_name.to_string(),
                     &args,
                     &partitionby_exprs,
-                    &orderby_exprs,
+                    orderby_exprs.as_ref(),
                     Arc::new(window_frame),
                     &extended_schema,
                     false,
@@ -599,7 +600,7 @@ async fn run_window_test(
     let ctx = SessionContext::new_with_config(session_config);
     let (window_fn, args, fn_name) = get_random_function(&schema, &mut rng, is_linear);
     let window_frame = get_random_window_frame(&mut rng, is_linear);
-    let mut orderby_exprs = vec![];
+    let mut orderby_exprs = LexOrdering::default();
     for column in &orderby_columns {
         orderby_exprs.push(PhysicalSortExpr {
             expr: col(column, &schema)?,
@@ -607,27 +608,27 @@ async fn run_window_test(
         })
     }
     if orderby_exprs.len() > 1 && !window_frame.can_accept_multi_orderby() {
-        orderby_exprs = orderby_exprs[0..1].to_vec();
+        orderby_exprs = LexOrdering::new(orderby_exprs[0..1].to_vec());
     }
     let mut partitionby_exprs = vec![];
     for column in &partition_by_columns {
         partitionby_exprs.push(col(column, &schema)?);
     }
-    let mut sort_keys = vec![];
+    let mut sort_keys = LexOrdering::default();
     for partition_by_expr in &partitionby_exprs {
         sort_keys.push(PhysicalSortExpr {
             expr: partition_by_expr.clone(),
             options: SortOptions::default(),
         })
     }
-    for order_by_expr in &orderby_exprs {
+    for order_by_expr in &orderby_exprs.inner {
         if !sort_keys.contains(order_by_expr) {
             sort_keys.push(order_by_expr.clone())
         }
     }
 
     let concat_input_record = concat_batches(&schema, &input1)?;
-    let source_sort_keys = vec![
+    let source_sort_keys = LexOrdering::new(vec![
         PhysicalSortExpr {
             expr: col("a", &schema)?,
             options: Default::default(),
@@ -640,7 +641,7 @@ async fn run_window_test(
             expr: col("c", &schema)?,
             options: Default::default(),
         },
-    ];
+    ]);
     let mut exec1 = Arc::new(
         MemoryExec::try_new(&[vec![concat_input_record]], schema.clone(), None)?
             .try_with_sort_information(vec![source_sort_keys.clone()])?,
@@ -659,7 +660,7 @@ async fn run_window_test(
             fn_name.clone(),
             &args,
             &partitionby_exprs,
-            &orderby_exprs,
+            orderby_exprs.as_ref(),
             Arc::new(window_frame.clone()),
             &extended_schema,
             false,
@@ -677,7 +678,7 @@ async fn run_window_test(
             fn_name,
             &args,
             &partitionby_exprs,
-            &orderby_exprs,
+            orderby_exprs.as_ref(),
             Arc::new(window_frame.clone()),
             &extended_schema,
             false,
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index fc2fb9afb5f9..6817969580da 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -238,15 +238,15 @@ async fn sort_preserving_merge() {
             // SortPreservingMergeExec (not a Sort which would compete
             // with the SortPreservingMergeExec for memory)
             &[
-                "+---------------+-----------------------------------------------------------------------------------------------------------+",
-                "| plan_type     | plan                                                                                                      |",
-                "+---------------+-----------------------------------------------------------------------------------------------------------+",
-                "| logical_plan  | Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                    |",
-                "|               |   TableScan: t projection=[a, b]                                                                          |",
-                "| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10                                |",
-                "|               |   MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
-                "|               |                                                                                                           |",
-                "+---------------+-----------------------------------------------------------------------------------------------------------+",
+                "+---------------+------------------------------------------------------------------------------------------------------------+",
+                "| plan_type     | plan                                                                                                       |",
+                "+---------------+------------------------------------------------------------------------------------------------------------+",
+                "| logical_plan  | Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10                                                     |",
+                "|               |   TableScan: t projection=[a, b]                                                                           |",
+                "| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10                                |",
+                "|               |   MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST, b@1 ASC NULLS LAST |",
+                "|               |                                                                                                            |",
+                "+---------------+------------------------------------------------------------------------------------------------------------+",
             ]
         )
         .run()
@@ -281,15 +281,15 @@ async fn sort_spill_reservation() {
             // also merge, so we can ensure the sort could finish
             // given enough merging memory
             &[
-                "+---------------+--------------------------------------------------------------------------------------------------------+",
-                "| plan_type     | plan                                                                                                   |",
-                "+---------------+--------------------------------------------------------------------------------------------------------+",
-                "| logical_plan  | Sort: t.a ASC NULLS LAST, t.b DESC NULLS FIRST                                                         |",
-                "|               |   TableScan: t projection=[a, b]                                                                       |",
-                "| physical_plan | SortExec: expr=[a@0 ASC NULLS LAST,b@1 DESC], preserve_partitioning=[false]                            |",
-                "|               |   MemoryExec: partitions=1, partition_sizes=[5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |",
-                "|               |                                                                                                        |",
-                "+---------------+--------------------------------------------------------------------------------------------------------+",
+                "+---------------+---------------------------------------------------------------------------------------------------------+",
+                "| plan_type     | plan                                                                                                    |",
+                "+---------------+---------------------------------------------------------------------------------------------------------+",
+                "| logical_plan  | Sort: t.a ASC NULLS LAST, t.b DESC NULLS FIRST                                                          |",
+                "|               |   TableScan: t projection=[a, b]                                                                        |",
+                "| physical_plan | SortExec: expr=[a@0 ASC NULLS LAST, b@1 DESC], preserve_partitioning=[false]                            |",
+                "|               |   MemoryExec: partitions=1, partition_sizes=[5], output_ordering=a@0 ASC NULLS LAST, b@1 ASC NULLS LAST |",
+                "|               |                                                                                                         |",
+                "+---------------+---------------------------------------------------------------------------------------------------------+",
             ]
         );
 
@@ -654,7 +654,7 @@ impl Scenario {
                     descending: false,
                     nulls_first: false,
                 };
-                let sort_information = vec![vec![
+                let sort_information = vec![LexOrdering::new(vec![
                     PhysicalSortExpr {
                         expr: col("a", &schema).unwrap(),
                         options,
@@ -663,7 +663,7 @@ impl Scenario {
                         expr: col("b", &schema).unwrap(),
                         options,
                     },
-                ]];
+                ])];
 
                 let table = SortedTableProvider::new(batches, sort_information);
                 Arc::new(table)
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index 6859e2f1468c..6910db6285a3 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -37,6 +37,7 @@ use datafusion_physical_expr::{
     expressions::{cast, col},
     PhysicalExpr, PhysicalSortExpr,
 };
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_optimizer::{
     limited_distinct_aggregation::LimitedDistinctAggregation, PhysicalOptimizerRule,
 };
@@ -407,10 +408,10 @@ fn test_has_filter() -> Result<()> {
 
 #[test]
 fn test_has_order_by() -> Result<()> {
-    let sort_key = vec![PhysicalSortExpr {
+    let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
         expr: col("a", &schema()).unwrap(),
         options: SortOptions::default(),
-    }];
+    }]);
     let source = parquet_exec_with_sort(vec![sort_key]);
     let schema = source.schema();
 
diff --git a/datafusion/core/tests/physical_optimizer/test_util.rs b/datafusion/core/tests/physical_optimizer/test_util.rs
index 131b887c4ec7..12cd08fb3db3 100644
--- a/datafusion/core/tests/physical_optimizer/test_util.rs
+++ b/datafusion/core/tests/physical_optimizer/test_util.rs
@@ -25,11 +25,11 @@ use datafusion::datasource::{
     physical_plan::{FileScanConfig, ParquetExec},
 };
 use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 /// create a single parquet file that is sorted
 pub(crate) fn parquet_exec_with_sort(
-    output_ordering: Vec<Vec<PhysicalSortExpr>>,
+    output_ordering: Vec<LexOrdering>,
 ) -> Arc<ParquetExec> {
     ParquetExec::builder(
         FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
diff --git a/datafusion/functions-aggregate-common/src/accumulator.rs b/datafusion/functions-aggregate-common/src/accumulator.rs
index ddf0085b9de4..67ada562800b 100644
--- a/datafusion/functions-aggregate-common/src/accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/accumulator.rs
@@ -18,9 +18,8 @@
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion_common::Result;
 use datafusion_expr_common::accumulator::Accumulator;
-use datafusion_physical_expr_common::{
-    physical_expr::PhysicalExpr, sort_expr::PhysicalSortExpr,
-};
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use std::sync::Arc;
 
 /// [`AccumulatorArgs`] contains information about how an aggregate
@@ -53,7 +52,7 @@ pub struct AccumulatorArgs<'a> {
     /// ```
     ///
     /// If no `ORDER BY` is specified, `ordering_req` will be empty.
-    pub ordering_req: &'a [PhysicalSortExpr],
+    pub ordering_req: LexOrderingRef<'a>,
 
     /// Whether the aggregation is running in reverse order
     pub is_reversed: bool,
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index 4fba772d8ddc..f55e5ec9a41d 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -30,7 +30,7 @@ use arrow::{
 };
 use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr_common::accumulator::Accumulator;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 
 /// Convert scalar values from an accumulator into arrays.
 pub fn get_accum_scalar_values_as_arrays(
@@ -88,7 +88,7 @@ pub fn adjust_output_array(data_type: &DataType, array: ArrayRef) -> Result<Arra
 
 /// Construct corresponding fields for lexicographical ordering requirement expression
 pub fn ordering_fields(
-    ordering_req: &[PhysicalSortExpr],
+    ordering_req: LexOrderingRef,
     // Data type of each expression in the ordering requirement
     data_types: &[DataType],
 ) -> Vec<Field> {
@@ -107,7 +107,7 @@ pub fn ordering_fields(
 }
 
 /// Selects the sort option attribute from all the given `PhysicalSortExpr`s.
-pub fn get_sort_options(ordering_req: &[PhysicalSortExpr]) -> Vec<SortOptions> {
+pub fn get_sort_options(ordering_req: LexOrderingRef) -> Vec<SortOptions> {
     ordering_req.iter().map(|item| item.options).collect()
 }
 
diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs
index 65956cb8a1de..1c8266ed5b89 100644
--- a/datafusion/functions-aggregate/benches/count.rs
+++ b/datafusion/functions-aggregate/benches/count.rs
@@ -23,6 +23,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::count::Count;
 use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use std::sync::Arc;
 
 fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
@@ -31,7 +32,7 @@ fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
         return_type: &DataType::Int64,
         schema: &schema,
         ignore_nulls: false,
-        ordering_req: &[],
+        ordering_req: LexOrderingRef::default(),
         is_reversed: false,
         name: "COUNT(f)",
         is_distinct: false,
diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs
index 652d447129dc..1e9493280ed2 100644
--- a/datafusion/functions-aggregate/benches/sum.rs
+++ b/datafusion/functions-aggregate/benches/sum.rs
@@ -23,6 +23,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::sum::Sum;
 use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use std::sync::Arc;
 
 fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
@@ -31,7 +32,7 @@ fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
         return_type: data_type,
         schema: &schema,
         ignore_nulls: false,
-        ordering_req: &[],
+        ordering_req: LexOrderingRef::default(),
         is_reversed: false,
         name: "SUM(f)",
         is_distinct: false,
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index b3e04c5584ef..7c22c21e38c9 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -135,7 +135,7 @@ impl AggregateUDFImpl for ArrayAgg {
         OrderSensitiveArrayAggAccumulator::try_new(
             &data_type,
             &ordering_dtypes,
-            acc_args.ordering_req.to_vec(),
+            LexOrdering::from_ref(acc_args.ordering_req),
             acc_args.is_reversed,
         )
         .map(|acc| Box::new(acc) as _)
@@ -511,7 +511,7 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
 
 impl OrderSensitiveArrayAggAccumulator {
     fn evaluate_orderings(&self) -> Result<ScalarValue> {
-        let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]);
+        let fields = ordering_fields(self.ordering_req.as_ref(), &self.datatypes[1..]);
         let num_columns = fields.len();
         let struct_field = Fields::from(fields.clone());
 
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index da3fc62f8c8c..0b05713499a9 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -37,7 +37,7 @@ use datafusion_expr::{
     ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::get_sort_options;
-use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 
 create_func!(FirstValue, first_value_udaf);
 
@@ -130,7 +130,7 @@ impl AggregateUDFImpl for FirstValue {
         FirstValueAccumulator::try_new(
             acc_args.return_type,
             &ordering_dtypes,
-            acc_args.ordering_req.to_vec(),
+            LexOrdering::from_ref(acc_args.ordering_req),
             acc_args.ignore_nulls,
         )
         .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _)
@@ -315,7 +315,7 @@ impl Accumulator for FirstValueAccumulator {
                 if compare_rows(
                     &self.orderings,
                     orderings,
-                    &get_sort_options(&self.ordering_req),
+                    &get_sort_options(self.ordering_req.as_ref()),
                 )?
                 .is_gt()
                 {
@@ -333,8 +333,10 @@ impl Accumulator for FirstValueAccumulator {
         let flags = states[is_set_idx].as_boolean();
         let filtered_states = filter_states_according_to_is_set(states, flags)?;
         // 1..is_set_idx range corresponds to ordering section
-        let sort_cols =
-            convert_to_sort_cols(&filtered_states[1..is_set_idx], &self.ordering_req);
+        let sort_cols = convert_to_sort_cols(
+            &filtered_states[1..is_set_idx],
+            self.ordering_req.as_ref(),
+        );
 
         let ordered_states = if sort_cols.is_empty() {
             // When no ordering is given, use the existing state as is:
@@ -347,7 +349,7 @@ impl Accumulator for FirstValueAccumulator {
             let first_row = get_row_at_idx(&ordered_states, 0)?;
             // When collecting orderings, we exclude the is_set flag from the state.
             let first_ordering = &first_row[1..is_set_idx];
-            let sort_options = get_sort_options(&self.ordering_req);
+            let sort_options = get_sort_options(self.ordering_req.as_ref());
             // Either there is no existing value, or there is an earlier version in new data.
             if !self.is_set
                 || compare_rows(&self.orderings, first_ordering, &sort_options)?.is_gt()
@@ -453,7 +455,7 @@ impl AggregateUDFImpl for LastValue {
         LastValueAccumulator::try_new(
             acc_args.return_type,
             &ordering_dtypes,
-            acc_args.ordering_req.to_vec(),
+            LexOrdering::from_ref(acc_args.ordering_req),
             acc_args.ignore_nulls,
         )
         .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _)
@@ -645,7 +647,7 @@ impl Accumulator for LastValueAccumulator {
             if compare_rows(
                 &self.orderings,
                 orderings,
-                &get_sort_options(&self.ordering_req),
+                &get_sort_options(self.ordering_req.as_ref()),
             )?
             .is_lt()
             {
@@ -663,8 +665,10 @@ impl Accumulator for LastValueAccumulator {
         let flags = states[is_set_idx].as_boolean();
         let filtered_states = filter_states_according_to_is_set(states, flags)?;
         // 1..is_set_idx range corresponds to ordering section
-        let sort_cols =
-            convert_to_sort_cols(&filtered_states[1..is_set_idx], &self.ordering_req);
+        let sort_cols = convert_to_sort_cols(
+            &filtered_states[1..is_set_idx],
+            self.ordering_req.as_ref(),
+        );
 
         let ordered_states = if sort_cols.is_empty() {
             // When no ordering is given, use existing state as is:
@@ -679,7 +683,7 @@ impl Accumulator for LastValueAccumulator {
             let last_row = get_row_at_idx(&ordered_states, last_idx)?;
             // When collecting orderings, we exclude the is_set flag from the state.
             let last_ordering = &last_row[1..is_set_idx];
-            let sort_options = get_sort_options(&self.ordering_req);
+            let sort_options = get_sort_options(self.ordering_req.as_ref());
             // Either there is no existing value, or there is a newer (latest)
             // version in the new data:
             if !self.is_set
@@ -721,7 +725,7 @@ fn filter_states_according_to_is_set(
 /// Combines array refs and their corresponding orderings to construct `SortColumn`s.
 fn convert_to_sort_cols(
     arrs: &[ArrayRef],
-    sort_exprs: &[PhysicalSortExpr],
+    sort_exprs: LexOrderingRef,
 ) -> Vec<SortColumn> {
     arrs.iter()
         .zip(sort_exprs.iter())
@@ -740,10 +744,18 @@ mod tests {
 
     #[test]
     fn test_first_last_value_value() -> Result<()> {
-        let mut first_accumulator =
-            FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
-        let mut last_accumulator =
-            LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut first_accumulator = FirstValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
+        let mut last_accumulator = LastValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
         // first value in the tuple is start of the range (inclusive),
         // second value in the tuple is end of the range (exclusive)
         let ranges: Vec<(i64, i64)> = vec![(0, 10), (1, 11), (2, 13)];
@@ -780,14 +792,22 @@ mod tests {
             .collect::<Vec<_>>();
 
         // FirstValueAccumulator
-        let mut first_accumulator =
-            FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut first_accumulator = FirstValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
 
         first_accumulator.update_batch(&[Arc::clone(&arrs[0])])?;
         let state1 = first_accumulator.state()?;
 
-        let mut first_accumulator =
-            FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut first_accumulator = FirstValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
         first_accumulator.update_batch(&[Arc::clone(&arrs[1])])?;
         let state2 = first_accumulator.state()?;
 
@@ -802,22 +822,34 @@ mod tests {
             ])?);
         }
 
-        let mut first_accumulator =
-            FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut first_accumulator = FirstValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
         first_accumulator.merge_batch(&states)?;
 
         let merged_state = first_accumulator.state()?;
         assert_eq!(merged_state.len(), state1.len());
 
         // LastValueAccumulator
-        let mut last_accumulator =
-            LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut last_accumulator = LastValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
 
         last_accumulator.update_batch(&[Arc::clone(&arrs[0])])?;
         let state1 = last_accumulator.state()?;
 
-        let mut last_accumulator =
-            LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut last_accumulator = LastValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
         last_accumulator.update_batch(&[Arc::clone(&arrs[1])])?;
         let state2 = last_accumulator.state()?;
 
@@ -832,8 +864,12 @@ mod tests {
             ])?);
         }
 
-        let mut last_accumulator =
-            LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?;
+        let mut last_accumulator = LastValueAccumulator::try_new(
+            &DataType::Int64,
+            &[],
+            LexOrdering::default(),
+            false,
+        )?;
         last_accumulator.merge_batch(&states)?;
 
         let merged_state = last_accumulator.state()?;
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index 2a1778d8b232..5f3a8cf2f161 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -133,7 +133,7 @@ impl AggregateUDFImpl for NthValueAgg {
             n,
             &data_type,
             &ordering_dtypes,
-            acc_args.ordering_req.to_vec(),
+            LexOrdering::from_ref(acc_args.ordering_req),
         )
         .map(|acc| Box::new(acc) as _)
     }
@@ -403,7 +403,7 @@ impl Accumulator for NthValueAccumulator {
 
 impl NthValueAccumulator {
     fn evaluate_orderings(&self) -> Result<ScalarValue> {
-        let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]);
+        let fields = ordering_fields(self.ordering_req.as_ref(), &self.datatypes[1..]);
         let struct_field = Fields::from(fields.clone());
 
         let mut column_wise_ordering_values = vec![];
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 355d1d5ad2db..95269ed8217c 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -410,6 +410,7 @@ mod tests {
     use datafusion_expr::AggregateUDF;
     use datafusion_functions_aggregate_common::utils::get_accum_scalar_values_as_arrays;
     use datafusion_physical_expr::expressions::col;
+    use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
     use std::sync::Arc;
 
     #[test]
@@ -461,7 +462,7 @@ mod tests {
             return_type: &DataType::Float64,
             schema,
             ignore_nulls: false,
-            ordering_req: &[],
+            ordering_req: LexOrderingRef::default(),
             name: "a",
             is_distinct: false,
             is_reversed: false,
@@ -472,7 +473,7 @@ mod tests {
             return_type: &DataType::Float64,
             schema,
             ignore_nulls: false,
-            ordering_req: &[],
+            ordering_req: LexOrderingRef::default(),
             name: "a",
             is_distinct: false,
             is_reversed: false,
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs
index d825bfe7e264..addf2fbfca0c 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -17,12 +17,13 @@
 
 //! Sort expressions
 
+use crate::physical_expr::PhysicalExpr;
+use std::fmt;
 use std::fmt::{Display, Formatter};
 use std::hash::{Hash, Hasher};
-use std::ops::Deref;
+use std::ops::{Deref, Index, Range, RangeFrom, RangeTo};
 use std::sync::Arc;
-
-use crate::physical_expr::PhysicalExpr;
+use std::vec::IntoIter;
 
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::datatypes::Schema;
@@ -143,7 +144,7 @@ impl Hash for PhysicalSortExpr {
 }
 
 impl Display for PhysicalSortExpr {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         write!(f, "{} {}", self.expr, to_str(&self.options))
     }
 }
@@ -183,26 +184,6 @@ impl PhysicalSortExpr {
                     .map_or(true, |opts| self.options.descending == opts.descending)
             }
     }
-
-    /// Returns a [`Display`]able list of `PhysicalSortExpr`.
-    pub fn format_list(input: &[PhysicalSortExpr]) -> impl Display + '_ {
-        struct DisplayableList<'a>(&'a [PhysicalSortExpr]);
-        impl<'a> Display for DisplayableList<'a> {
-            fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-                let mut first = true;
-                for sort_expr in self.0 {
-                    if first {
-                        first = false;
-                    } else {
-                        write!(f, ",")?;
-                    }
-                    write!(f, "{}", sort_expr)?;
-                }
-                Ok(())
-            }
-        }
-        DisplayableList(input)
-    }
 }
 
 /// Represents sort requirement associated with a plan
@@ -260,7 +241,7 @@ impl PartialEq for PhysicalSortRequirement {
 }
 
 impl Display for PhysicalSortRequirement {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
         let opts_string = self.options.as_ref().map_or("NA", to_str);
         write!(f, "{} {}", self.expr, opts_string)
     }
@@ -274,7 +255,7 @@ pub fn format_physical_sort_requirement_list(
 ) -> impl Display + '_ {
     struct DisplayWrapper<'a>(&'a [PhysicalSortRequirement]);
     impl<'a> Display for DisplayWrapper<'a> {
-        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
             let mut iter = self.0.iter();
             write!(f, "[")?;
             if let Some(expr) = iter.next() {
@@ -345,7 +326,7 @@ impl PhysicalSortRequirement {
     /// default ordering `ASC, NULLS LAST` if given (see the `PhysicalSortExpr::from`).
     pub fn to_sort_exprs(
         requirements: impl IntoIterator<Item = PhysicalSortRequirement>,
-    ) -> Vec<PhysicalSortExpr> {
+    ) -> LexOrdering {
         requirements
             .into_iter()
             .map(PhysicalSortExpr::from)
@@ -364,9 +345,147 @@ fn to_str(options: &SortOptions) -> &str {
     }
 }
 
-///`LexOrdering` is an alias for the type `Vec<PhysicalSortExpr>`, which represents
+///`LexOrdering` contains a `Vec<PhysicalSortExpr>`, which represents
 /// a lexicographical ordering.
-pub type LexOrdering = Vec<PhysicalSortExpr>;
+#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
+pub struct LexOrdering {
+    pub inner: Vec<PhysicalSortExpr>,
+}
+
+impl LexOrdering {
+    // Creates a new [`LexOrdering`] from a vector
+    pub fn new(inner: Vec<PhysicalSortExpr>) -> Self {
+        Self { inner }
+    }
+
+    pub fn as_ref(&self) -> LexOrderingRef {
+        &self.inner
+    }
+
+    pub fn capacity(&self) -> usize {
+        self.inner.capacity()
+    }
+
+    pub fn clear(&mut self) {
+        self.inner.clear()
+    }
+
+    pub fn contains(&self, expr: &PhysicalSortExpr) -> bool {
+        self.inner.contains(expr)
+    }
+
+    pub fn extend<I: IntoIterator<Item = PhysicalSortExpr>>(&mut self, iter: I) {
+        self.inner.extend(iter)
+    }
+
+    pub fn from_ref(lex_ordering_ref: LexOrderingRef) -> Self {
+        Self::new(lex_ordering_ref.to_vec())
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortExpr> {
+        self.inner.iter()
+    }
+
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    pub fn pop(&mut self) -> Option<PhysicalSortExpr> {
+        self.inner.pop()
+    }
+
+    pub fn push(&mut self, physical_sort_expr: PhysicalSortExpr) {
+        self.inner.push(physical_sort_expr)
+    }
+
+    pub fn retain(&mut self, f: impl FnMut(&PhysicalSortExpr) -> bool) {
+        self.inner.retain(f)
+    }
+
+    pub fn truncate(&mut self, len: usize) {
+        self.inner.truncate(len)
+    }
+}
+
+impl Deref for LexOrdering {
+    type Target = [PhysicalSortExpr];
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.as_slice()
+    }
+}
+
+impl Display for LexOrdering {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        let mut first = true;
+        for sort_expr in &self.inner {
+            if first {
+                first = false;
+            } else {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}", sort_expr)?;
+        }
+        Ok(())
+    }
+}
+
+impl FromIterator<PhysicalSortExpr> for LexOrdering {
+    fn from_iter<T: IntoIterator<Item = PhysicalSortExpr>>(iter: T) -> Self {
+        let mut lex_ordering = LexOrdering::default();
+
+        for i in iter {
+            lex_ordering.push(i);
+        }
+
+        lex_ordering
+    }
+}
+
+impl Index<usize> for LexOrdering {
+    type Output = PhysicalSortExpr;
+
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.inner[index]
+    }
+}
+
+impl Index<Range<usize>> for LexOrdering {
+    type Output = [PhysicalSortExpr];
+
+    fn index(&self, range: Range<usize>) -> &Self::Output {
+        &self.inner[range]
+    }
+}
+
+impl Index<RangeFrom<usize>> for LexOrdering {
+    type Output = [PhysicalSortExpr];
+
+    fn index(&self, range_from: RangeFrom<usize>) -> &Self::Output {
+        &self.inner[range_from]
+    }
+}
+
+impl Index<RangeTo<usize>> for LexOrdering {
+    type Output = [PhysicalSortExpr];
+
+    fn index(&self, range_to: RangeTo<usize>) -> &Self::Output {
+        &self.inner[range_to]
+    }
+}
+
+impl IntoIterator for LexOrdering {
+    type Item = PhysicalSortExpr;
+    type IntoIter = IntoIter<PhysicalSortExpr>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.inner.into_iter()
+    }
+}
 
 ///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents
 /// a reference to a lexicographical ordering.
@@ -384,6 +503,10 @@ impl LexRequirement {
         Self { inner }
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
     pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> {
         self.inner.iter()
     }
@@ -415,7 +538,7 @@ impl FromIterator<PhysicalSortRequirement> for LexRequirement {
 
 impl IntoIterator for LexRequirement {
     type Item = PhysicalSortRequirement;
-    type IntoIter = std::vec::IntoIter<Self::Item>;
+    type IntoIter = IntoIter<Self::Item>;
 
     fn into_iter(self) -> Self::IntoIter {
         self.inner.into_iter()
diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs
index d2c9bf1a2408..26293b1a76a2 100644
--- a/datafusion/physical-expr-common/src/utils.rs
+++ b/datafusion/physical-expr-common/src/utils.rs
@@ -24,7 +24,7 @@ use datafusion_common::Result;
 use datafusion_expr_common::sort_properties::ExprProperties;
 
 use crate::physical_expr::PhysicalExpr;
-use crate::sort_expr::PhysicalSortExpr;
+use crate::sort_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr};
 use crate::tree_node::ExprContext;
 
 /// Represents a [`PhysicalExpr`] node with associated properties (order and
@@ -96,7 +96,7 @@ pub fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result<ArrayRef> {
 /// Reverses the ORDER BY expression, which is useful during equivalent window
 /// expression construction. For instance, 'ORDER BY a ASC, NULLS LAST' turns into
 /// 'ORDER BY a DESC, NULLS FIRST'.
-pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec<PhysicalSortExpr> {
+pub fn reverse_order_bys(order_bys: LexOrderingRef) -> LexOrdering {
     order_bys
         .iter()
         .map(|e| PhysicalSortExpr::new(e.expr.clone(), !e.options))
diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
index 6330c240241a..e446776affc0 100644
--- a/datafusion/physical-expr/src/aggregate.rs
+++ b/datafusion/physical-expr/src/aggregate.rs
@@ -45,7 +45,7 @@ use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
 use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs;
 use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 use datafusion_physical_expr_common::utils::reverse_order_bys;
 
 use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
@@ -81,7 +81,7 @@ impl AggregateExprBuilder {
             args,
             alias: None,
             schema: Arc::new(Schema::empty()),
-            ordering_req: vec![],
+            ordering_req: LexOrdering::default(),
             ignore_nulls: false,
             is_distinct: false,
             is_reversed: false,
@@ -111,7 +111,8 @@ impl AggregateExprBuilder {
                 .map(|e| e.expr.data_type(&schema))
                 .collect::<Result<Vec<_>>>()?;
 
-            ordering_fields = utils::ordering_fields(&ordering_req, &ordering_types);
+            ordering_fields =
+                utils::ordering_fields(ordering_req.as_ref(), &ordering_types);
         }
 
         let input_exprs_types = args
@@ -265,7 +266,7 @@ impl AggregateFunctionExpr {
             return_type: &self.data_type,
             schema: &self.schema,
             ignore_nulls: self.ignore_nulls,
-            ordering_req: &self.ordering_req,
+            ordering_req: self.ordering_req.as_ref(),
             is_distinct: self.is_distinct,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -291,13 +292,13 @@ impl AggregateFunctionExpr {
     /// Order by requirements for the aggregate function
     /// By default it is `None` (there is no requirement)
     /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this
-    pub fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
+    pub fn order_bys(&self) -> Option<LexOrderingRef> {
         if self.ordering_req.is_empty() {
             return None;
         }
 
         if !self.order_sensitivity().is_insensitive() {
-            return Some(&self.ordering_req);
+            return Some(self.ordering_req.as_ref());
         }
 
         None
@@ -340,7 +341,7 @@ impl AggregateFunctionExpr {
         };
 
         AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec())
-            .order_by(self.ordering_req.to_vec())
+            .order_by(self.ordering_req.clone())
             .schema(Arc::new(self.schema.clone()))
             .alias(self.name().to_string())
             .with_ignore_nulls(self.ignore_nulls)
@@ -356,7 +357,7 @@ impl AggregateFunctionExpr {
             return_type: &self.data_type,
             schema: &self.schema,
             ignore_nulls: self.ignore_nulls,
-            ordering_req: &self.ordering_req,
+            ordering_req: self.ordering_req.as_ref(),
             is_distinct: self.is_distinct,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -425,7 +426,7 @@ impl AggregateFunctionExpr {
             return_type: &self.data_type,
             schema: &self.schema,
             ignore_nulls: self.ignore_nulls,
-            ordering_req: &self.ordering_req,
+            ordering_req: self.ordering_req.as_ref(),
             is_distinct: self.is_distinct,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -444,7 +445,7 @@ impl AggregateFunctionExpr {
             return_type: &self.data_type,
             schema: &self.schema,
             ignore_nulls: self.ignore_nulls,
-            ordering_req: &self.ordering_req,
+            ordering_req: self.ordering_req.as_ref(),
             is_distinct: self.is_distinct,
             name: &self.name,
             is_reversed: self.is_reversed,
@@ -462,7 +463,7 @@ impl AggregateFunctionExpr {
             ReversedUDAF::NotSupported => None,
             ReversedUDAF::Identical => Some(self.clone()),
             ReversedUDAF::Reversed(reverse_udf) => {
-                let reverse_ordering_req = reverse_order_bys(&self.ordering_req);
+                let reverse_ordering_req = reverse_order_bys(self.ordering_req.as_ref());
                 let mut name = self.name().to_string();
                 // If the function is changed, we need to reverse order_by clause as well
                 // i.e. First(a order by b asc null first) -> Last(a order by b desc null last)
@@ -473,7 +474,7 @@ impl AggregateFunctionExpr {
                 replace_fn_name_clause(&mut name, self.fun.name(), reverse_udf.name());
 
                 AggregateExprBuilder::new(reverse_udf, self.args.to_vec())
-                    .order_by(reverse_ordering_req.to_vec())
+                    .order_by(reverse_ordering_req)
                     .schema(Arc::new(self.schema.clone()))
                     .alias(name)
                     .with_ignore_nulls(self.ignore_nulls)
@@ -489,7 +490,7 @@ impl AggregateFunctionExpr {
     /// These expressions are  (1)function arguments, (2) order by expressions.
     pub fn all_expressions(&self) -> AggregatePhysicalExpressions {
         let args = self.expressions();
-        let order_bys = self.order_bys().unwrap_or(&[]);
+        let order_bys = self.order_bys().unwrap_or_default();
         let order_by_exprs = order_bys
             .iter()
             .map(|sort_expr| Arc::clone(&sort_expr.expr))
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index 95bb93d6ca57..902e53a7f236 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -80,6 +80,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_schema::{SchemaRef, SortOptions};
     use datafusion_common::{plan_datafusion_err, Result};
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
     pub fn output_schema(
         mapping: &ProjectionMapping,
@@ -184,7 +185,7 @@ mod tests {
     // Convert each tuple to PhysicalSortExpr
     pub fn convert_to_sort_exprs(
         in_data: &[(&Arc<dyn PhysicalExpr>, SortOptions)],
-    ) -> Vec<PhysicalSortExpr> {
+    ) -> LexOrdering {
         in_data
             .iter()
             .map(|(expr, options)| PhysicalSortExpr {
@@ -197,7 +198,7 @@ mod tests {
     // Convert each inner tuple to PhysicalSortExpr
     pub fn convert_to_orderings(
         orderings: &[Vec<(&Arc<dyn PhysicalExpr>, SortOptions)>],
-    ) -> Vec<Vec<PhysicalSortExpr>> {
+    ) -> Vec<LexOrdering> {
         orderings
             .iter()
             .map(|sort_exprs| convert_to_sort_exprs(sort_exprs))
@@ -207,20 +208,22 @@ mod tests {
     // Convert each tuple to PhysicalSortExpr
     pub fn convert_to_sort_exprs_owned(
         in_data: &[(Arc<dyn PhysicalExpr>, SortOptions)],
-    ) -> Vec<PhysicalSortExpr> {
-        in_data
-            .iter()
-            .map(|(expr, options)| PhysicalSortExpr {
-                expr: Arc::clone(expr),
-                options: *options,
-            })
-            .collect()
+    ) -> LexOrdering {
+        LexOrdering::new(
+            in_data
+                .iter()
+                .map(|(expr, options)| PhysicalSortExpr {
+                    expr: Arc::clone(expr),
+                    options: *options,
+                })
+                .collect(),
+        )
     }
 
     // Convert each inner tuple to PhysicalSortExpr
     pub fn convert_to_orderings_owned(
         orderings: &[Vec<(Arc<dyn PhysicalExpr>, SortOptions)>],
-    ) -> Vec<Vec<PhysicalSortExpr>> {
+    ) -> Vec<LexOrdering> {
         orderings
             .iter()
             .map(|sort_exprs| convert_to_sort_exprs_owned(sort_exprs))
diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs
index d71f3b037fb1..838c9800f942 100644
--- a/datafusion/physical-expr/src/equivalence/ordering.rs
+++ b/datafusion/physical-expr/src/equivalence/ordering.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use std::vec::IntoIter;
 
 use crate::equivalence::add_offset_to_expr;
-use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr};
+use crate::{LexOrdering, PhysicalExpr};
 use arrow_schema::SortOptions;
 
 /// An `OrderingEquivalenceClass` object keeps track of different alternative
@@ -146,7 +146,12 @@ impl OrderingEquivalenceClass {
     /// Returns the concatenation of all the orderings. This enables merge
     /// operations to preserve all equivalent orderings simultaneously.
     pub fn output_ordering(&self) -> Option<LexOrdering> {
-        let output_ordering = self.orderings.iter().flatten().cloned().collect();
+        let output_ordering = self
+            .orderings
+            .iter()
+            .flat_map(|ordering| ordering.as_ref())
+            .cloned()
+            .collect();
         let output_ordering = collapse_lex_ordering(output_ordering);
         (!output_ordering.is_empty()).then_some(output_ordering)
     }
@@ -169,7 +174,7 @@ impl OrderingEquivalenceClass {
             for idx in 0..n_ordering {
                 // Calculate cross product index
                 let idx = outer_idx * n_ordering + idx;
-                self.orderings[idx].extend(ordering.iter().cloned());
+                self.orderings[idx].inner.extend(ordering.iter().cloned());
             }
         }
         self
@@ -179,7 +184,7 @@ impl OrderingEquivalenceClass {
     /// ordering equivalence class.
     pub fn add_offset(&mut self, offset: usize) {
         for ordering in self.orderings.iter_mut() {
-            for sort_expr in ordering {
+            for sort_expr in ordering.inner.iter_mut() {
                 sort_expr.expr = add_offset_to_expr(Arc::clone(&sort_expr.expr), offset);
             }
         }
@@ -211,10 +216,10 @@ impl IntoIterator for OrderingEquivalenceClass {
 /// duplicate entries that have same physical expression inside. For example,
 /// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`.
 pub fn collapse_lex_ordering(input: LexOrdering) -> LexOrdering {
-    let mut output = Vec::<PhysicalSortExpr>::new();
-    for item in input {
+    let mut output = LexOrdering::default();
+    for item in input.iter() {
         if !output.iter().any(|req| req.expr.eq(&item.expr)) {
-            output.push(item);
+            output.push(item.clone());
         }
     }
     output
@@ -239,10 +244,10 @@ impl Display for OrderingEquivalenceClass {
         write!(f, "[")?;
         let mut iter = self.orderings.iter();
         if let Some(ordering) = iter.next() {
-            write!(f, "[{}]", PhysicalSortExpr::format_list(ordering))?;
+            write!(f, "[{}]", ordering)?;
         }
         for ordering in iter {
-            write!(f, ", [{}]", PhysicalSortExpr::format_list(ordering))?;
+            write!(f, ", [{}]", ordering)?;
         }
         write!(f, "]")?;
         Ok(())
@@ -268,6 +273,7 @@ mod tests {
     use arrow_schema::SortOptions;
     use datafusion_common::{DFSchema, Result};
     use datafusion_expr::{Operator, ScalarUDF};
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
     #[test]
     fn test_ordering_satisfy() -> Result<()> {
@@ -275,11 +281,11 @@ mod tests {
             Field::new("a", DataType::Int64, true),
             Field::new("b", DataType::Int64, true),
         ]));
-        let crude = vec![PhysicalSortExpr {
+        let crude = LexOrdering::new(vec![PhysicalSortExpr {
             expr: Arc::new(Column::new("a", 0)),
             options: SortOptions::default(),
-        }];
-        let finer = vec![
+        }]);
+        let finer = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::new(Column::new("a", 0)),
                 options: SortOptions::default(),
@@ -288,18 +294,18 @@ mod tests {
                 expr: Arc::new(Column::new("b", 1)),
                 options: SortOptions::default(),
             },
-        ];
+        ]);
         // finer ordering satisfies, crude ordering should return true
         let mut eq_properties_finer =
             EquivalenceProperties::new(Arc::clone(&input_schema));
         eq_properties_finer.oeq_class.push(finer.clone());
-        assert!(eq_properties_finer.ordering_satisfy(&crude));
+        assert!(eq_properties_finer.ordering_satisfy(crude.as_ref()));
 
         // Crude ordering doesn't satisfy finer ordering. should return false
         let mut eq_properties_crude =
             EquivalenceProperties::new(Arc::clone(&input_schema));
         eq_properties_crude.oeq_class.push(crude);
-        assert!(!eq_properties_crude.ordering_satisfy(&finer));
+        assert!(!eq_properties_crude.ordering_satisfy(finer.as_ref()));
         Ok(())
     }
 
@@ -589,7 +595,7 @@ mod tests {
 
             let reqs = convert_to_sort_exprs(&reqs);
             assert_eq!(
-                eq_properties.ordering_satisfy(&reqs),
+                eq_properties.ordering_satisfy(reqs.as_ref()),
                 expected,
                 "{}",
                 err_msg
@@ -649,7 +655,7 @@ mod tests {
                 format!("error in test reqs: {:?}, expected: {:?}", reqs, expected,);
             let reqs = convert_to_sort_exprs(&reqs);
             assert_eq!(
-                eq_properties.ordering_satisfy(&reqs),
+                eq_properties.ordering_satisfy(reqs.as_ref()),
                 expected,
                 "{}",
                 err_msg
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index 9a16b205ae25..55c99e93d040 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -103,7 +103,7 @@ use itertools::Itertools;
 /// # use arrow_schema::{Schema, Field, DataType, SchemaRef};
 /// # use datafusion_physical_expr::{ConstExpr, EquivalenceProperties};
 /// # use datafusion_physical_expr::expressions::col;
-/// use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+/// use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 /// # let schema: SchemaRef = Arc::new(Schema::new(vec![
 /// #   Field::new("a", DataType::Int32, false),
 /// #   Field::new("b", DataType::Int32, false),
@@ -116,12 +116,12 @@ use itertools::Itertools;
 /// // with a single constant value of b
 /// let mut eq_properties = EquivalenceProperties::new(schema)
 ///   .with_constants(vec![ConstExpr::from(col_b)]);
-/// eq_properties.add_new_ordering(vec![
+/// eq_properties.add_new_ordering(LexOrdering::new(vec![
 ///   PhysicalSortExpr::new_default(col_a).asc(),
 ///   PhysicalSortExpr::new_default(col_c).desc(),
-/// ]);
+/// ]));
 ///
-/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]")
+/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC, c@2 DESC]], const: [b@1]")
 /// ```
 #[derive(Debug, Clone)]
 pub struct EquivalenceProperties {
@@ -185,6 +185,7 @@ impl EquivalenceProperties {
         let mut output_ordering = self.oeq_class().output_ordering().unwrap_or_default();
         // Prune out constant expressions
         output_ordering
+            .inner
             .retain(|sort_expr| !const_exprs_contains(constants, &sort_expr.expr));
         (!output_ordering.is_empty()).then_some(output_ordering)
     }
@@ -196,7 +197,7 @@ impl EquivalenceProperties {
         OrderingEquivalenceClass::new(
             self.oeq_class
                 .iter()
-                .map(|ordering| self.normalize_sort_exprs(ordering))
+                .map(|ordering| self.normalize_sort_exprs(ordering.as_ref()))
                 .collect(),
         )
     }
@@ -351,7 +352,7 @@ impl EquivalenceProperties {
             .iter()
             .filter(|ordering| ordering[0].expr.eq(&normalized_expr))
             // First expression after leading ordering
-            .filter_map(|ordering| Some(ordering).zip(ordering.get(1)))
+            .filter_map(|ordering| Some(ordering).zip(ordering.inner.get(1)))
         {
             let leading_ordering = ordering[0].options;
             // Currently, we only handle expressions with a single child.
@@ -378,7 +379,7 @@ impl EquivalenceProperties {
                     // then we can deduce that ordering `[b ASC]` is also valid.
                     // Hence, ordering `[b ASC]` can be added to the state as valid ordering.
                     // (e.g. existing ordering where leading ordering is removed)
-                    new_orderings.push(ordering[1..].to_vec());
+                    new_orderings.push(LexOrdering::new(ordering[1..].to_vec()));
                     break;
                 }
             }
@@ -391,7 +392,7 @@ impl EquivalenceProperties {
     /// Updates the ordering equivalence group within assuming that the table
     /// is re-sorted according to the argument `sort_exprs`. Note that constants
     /// and equivalence classes are unchanged as they are unaffected by a re-sort.
-    pub fn with_reorder(mut self, sort_exprs: Vec<PhysicalSortExpr>) -> Self {
+    pub fn with_reorder(mut self, sort_exprs: LexOrdering) -> Self {
         // TODO: In some cases, existing ordering equivalences may still be valid add this analysis.
         self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]);
         self
@@ -605,8 +606,8 @@ impl EquivalenceProperties {
     pub fn substitute_ordering_component(
         &self,
         mapping: &ProjectionMapping,
-        sort_expr: &[PhysicalSortExpr],
-    ) -> Result<Vec<Vec<PhysicalSortExpr>>> {
+        sort_expr: LexOrderingRef,
+    ) -> Result<Vec<LexOrdering>> {
         let new_orderings = sort_expr
             .iter()
             .map(|sort_expr| {
@@ -616,7 +617,7 @@ impl EquivalenceProperties {
                     .filter(|source| expr_refers(source, &sort_expr.expr))
                     .cloned()
                     .collect();
-                let mut res = vec![sort_expr.clone()];
+                let mut res = LexOrdering::new(vec![sort_expr.clone()]);
                 // TODO: Add one-to-ones analysis for ScalarFunctions.
                 for r_expr in referring_exprs {
                     // we check whether this expression is substitutable or not
@@ -639,7 +640,9 @@ impl EquivalenceProperties {
         // Generate all valid orderings, given substituted expressions.
         let res = new_orderings
             .into_iter()
+            .map(|ordering| ordering.inner)
             .multi_cartesian_product()
+            .map(LexOrdering::new)
             .collect::<Vec<_>>();
         Ok(res)
     }
@@ -653,7 +656,7 @@ impl EquivalenceProperties {
         let orderings = &self.oeq_class.orderings;
         let new_order = orderings
             .iter()
-            .map(|order| self.substitute_ordering_component(mapping, order))
+            .map(|order| self.substitute_ordering_component(mapping, order.as_ref()))
             .collect::<Result<Vec<_>>>()?;
         let new_order = new_order.into_iter().flatten().collect();
         self.oeq_class = OrderingEquivalenceClass::new(new_order);
@@ -836,7 +839,7 @@ impl EquivalenceProperties {
             if prefixes.is_empty() {
                 // If prefix is empty, there is no dependency. Insert
                 // empty ordering:
-                prefixes = vec![vec![]];
+                prefixes = vec![LexOrdering::default()];
             }
             // Append current ordering on top its dependencies:
             for ordering in prefixes.iter_mut() {
@@ -986,7 +989,8 @@ impl EquivalenceProperties {
             // Add new ordered section to the state.
             result.extend(ordered_exprs);
         }
-        result.into_iter().unzip()
+        let (left, right) = result.into_iter().unzip();
+        (LexOrdering::new(left), right)
     }
 
     /// This function determines whether the provided expression is constant
@@ -1076,6 +1080,7 @@ impl EquivalenceProperties {
         let mut new_orderings = vec![];
         for ordering in self.oeq_class.orderings {
             let new_ordering = ordering
+                .inner
                 .into_iter()
                 .map(|mut sort_expr| {
                     sort_expr.expr = with_new_schema(sort_expr.expr, &schema)?;
@@ -1313,7 +1318,7 @@ fn construct_prefix_orderings(
 /// Generates all possible orderings where dependencies are satisfied for the
 /// current projection expression.
 ///
-/// # Examaple
+/// # Example
 ///  If `dependences` is `a + b ASC` and the dependency map holds dependencies
 ///  * `a ASC` --> `[c ASC]`
 ///  * `b ASC` --> `[d DESC]`,
@@ -1348,7 +1353,7 @@ fn generate_dependency_orderings(
     // No dependency, dependent is a leading ordering.
     if relevant_prefixes.is_empty() {
         // Return an empty ordering:
-        return vec![vec![]];
+        return vec![LexOrdering::default()];
     }
 
     relevant_prefixes
@@ -1358,7 +1363,12 @@ fn generate_dependency_orderings(
             prefix_orderings
                 .iter()
                 .permutations(prefix_orderings.len())
-                .map(|prefixes| prefixes.into_iter().flatten().cloned().collect())
+                .map(|prefixes| {
+                    prefixes
+                        .into_iter()
+                        .flat_map(|ordering| ordering.inner.clone())
+                        .collect()
+                })
                 .collect::<Vec<_>>()
         })
         .collect()
@@ -1651,7 +1661,7 @@ impl<'a> DependencyEnumerator<'a> {
         // An empty dependency means the referred_sort_expr represents a global ordering.
         // Return its projected version, which is the target_expression.
         if node.dependencies.is_empty() {
-            return vec![vec![target_sort_expr.clone()]];
+            return vec![LexOrdering::new(vec![target_sort_expr.clone()])];
         };
 
         node.dependencies
@@ -1961,7 +1971,7 @@ impl UnionEquivalentOrderingBuilder {
     ) -> AddedOrdering {
         if ordering.is_empty() {
             AddedOrdering::Yes
-        } else if constants.is_empty() && properties.ordering_satisfy(&ordering) {
+        } else if constants.is_empty() && properties.ordering_satisfy(ordering.as_ref()) {
             // If the ordering satisfies the target properties, no need to
             // augment it with constants.
             self.orderings.push(ordering);
@@ -2002,7 +2012,7 @@ impl UnionEquivalentOrderingBuilder {
                 &properties.constants,
             ) {
                 if !augmented_ordering.is_empty() {
-                    assert!(properties.ordering_satisfy(&augmented_ordering));
+                    assert!(properties.ordering_satisfy(augmented_ordering.as_ref()));
                     self.orderings.push(augmented_ordering);
                 }
             }
@@ -2022,9 +2032,9 @@ impl UnionEquivalentOrderingBuilder {
         existing_ordering: &LexOrdering,
         existing_constants: &[ConstExpr],
     ) -> Option<LexOrdering> {
-        let mut augmented_ordering = vec![];
-        let mut sort_expr_iter = ordering.iter().peekable();
-        let mut existing_sort_expr_iter = existing_ordering.iter().peekable();
+        let mut augmented_ordering = LexOrdering::default();
+        let mut sort_expr_iter = ordering.inner.iter().peekable();
+        let mut existing_sort_expr_iter = existing_ordering.inner.iter().peekable();
 
         // walk in parallel down the two orderings, trying to match them up
         while sort_expr_iter.peek().is_some() || existing_sort_expr_iter.peek().is_some()
@@ -2170,20 +2180,20 @@ mod tests {
 
         let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema));
         // add equivalent ordering [a, b, c, d]
-        input_properties.add_new_ordering(vec![
+        input_properties.add_new_ordering(LexOrdering::new(vec![
             parse_sort_expr("a", &input_schema),
             parse_sort_expr("b", &input_schema),
             parse_sort_expr("c", &input_schema),
             parse_sort_expr("d", &input_schema),
-        ]);
+        ]));
 
         // add equivalent ordering [a, c, b, d]
-        input_properties.add_new_ordering(vec![
+        input_properties.add_new_ordering(LexOrdering::new(vec![
             parse_sort_expr("a", &input_schema),
             parse_sort_expr("c", &input_schema),
             parse_sort_expr("b", &input_schema), // NB b and c are swapped
             parse_sort_expr("d", &input_schema),
-        ]);
+        ]));
 
         // simply project all the columns in order
         let proj_exprs = vec![
@@ -2197,7 +2207,7 @@ mod tests {
 
         assert_eq!(
             out_properties.to_string(),
-            "order: [[a@0 ASC,c@2 ASC,b@1 ASC,d@3 ASC], [a@0 ASC,b@1 ASC,c@2 ASC,d@3 ASC]]"
+            "order: [[a@0 ASC, c@2 ASC, b@1 ASC, d@3 ASC], [a@0 ASC, b@1 ASC, c@2 ASC, d@3 ASC]]"
         );
 
         Ok(())
@@ -2403,27 +2413,27 @@ mod tests {
 
         eq_properties.add_equal_conditions(&col_a_expr, &col_c_expr)?;
         let others = vec![
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(&col_b_expr),
                 options: sort_options,
-            }],
-            vec![PhysicalSortExpr {
+            }]),
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(&col_c_expr),
                 options: sort_options,
-            }],
+            }]),
         ];
         eq_properties.add_new_orderings(others);
 
         let mut expected_eqs = EquivalenceProperties::new(Arc::new(schema));
         expected_eqs.add_new_orderings([
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(&col_b_expr),
                 options: sort_options,
-            }],
-            vec![PhysicalSortExpr {
+            }]),
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(&col_c_expr),
                 options: sort_options,
-            }],
+            }]),
         ]);
 
         let oeq_class = eq_properties.oeq_class().clone();
@@ -2446,7 +2456,7 @@ mod tests {
         let col_b = &col("b", &schema)?;
         let required_columns = [Arc::clone(col_b), Arc::clone(col_a)];
         let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
-        eq_properties.add_new_orderings([vec![
+        eq_properties.add_new_orderings([LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::new(Column::new("b", 1)),
                 options: sort_options_not,
@@ -2455,12 +2465,12 @@ mod tests {
                 expr: Arc::new(Column::new("a", 0)),
                 options: sort_options,
             },
-        ]]);
+        ])]);
         let (result, idxs) = eq_properties.find_longest_permutation(&required_columns);
         assert_eq!(idxs, vec![0, 1]);
         assert_eq!(
             result,
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::clone(col_b),
                     options: sort_options_not
@@ -2469,7 +2479,7 @@ mod tests {
                     expr: Arc::clone(col_a),
                     options: sort_options
                 }
-            ]
+            ])
         );
 
         let schema = Schema::new(vec![
@@ -2482,11 +2492,11 @@ mod tests {
         let required_columns = [Arc::clone(col_b), Arc::clone(col_a)];
         let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
         eq_properties.add_new_orderings([
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::new(Column::new("c", 2)),
                 options: sort_options,
-            }],
-            vec![
+            }]),
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::new(Column::new("b", 1)),
                     options: sort_options_not,
@@ -2495,13 +2505,13 @@ mod tests {
                     expr: Arc::new(Column::new("a", 0)),
                     options: sort_options,
                 },
-            ],
+            ]),
         ]);
         let (result, idxs) = eq_properties.find_longest_permutation(&required_columns);
         assert_eq!(idxs, vec![0, 1]);
         assert_eq!(
             result,
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::clone(col_b),
                     options: sort_options_not
@@ -2510,7 +2520,7 @@ mod tests {
                     expr: Arc::clone(col_a),
                     options: sort_options
                 }
-            ]
+            ])
         );
 
         let required_columns = [
@@ -2525,7 +2535,7 @@ mod tests {
         let mut eq_properties = EquivalenceProperties::new(Arc::new(schema));
 
         // not satisfied orders
-        eq_properties.add_new_orderings([vec![
+        eq_properties.add_new_orderings([LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::new(Column::new("b", 1)),
                 options: sort_options_not,
@@ -2538,7 +2548,7 @@ mod tests {
                 expr: Arc::new(Column::new("a", 0)),
                 options: sort_options,
             },
-        ]]);
+        ])]);
         let (_, idxs) = eq_properties.find_longest_permutation(&required_columns);
         assert_eq!(idxs, vec![0]);
 
@@ -2567,14 +2577,14 @@ mod tests {
         eq_properties.add_equal_conditions(col_b, col_a)?;
         // [b ASC], [d ASC]
         eq_properties.add_new_orderings(vec![
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(col_b),
                 options: option_asc,
-            }],
-            vec![PhysicalSortExpr {
+            }]),
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: Arc::clone(col_d),
                 options: option_asc,
-            }],
+            }]),
         ]);
 
         let test_cases = vec![
@@ -2605,7 +2615,7 @@ mod tests {
             let leading_orderings = eq_properties
                 .oeq_class()
                 .iter()
-                .flat_map(|ordering| ordering.first().cloned())
+                .flat_map(|ordering| ordering.inner.first().cloned())
                 .collect::<Vec<_>>();
             let expr_props = eq_properties.get_expr_properties(Arc::clone(&expr));
             let err_msg = format!(
@@ -2649,7 +2659,7 @@ mod tests {
             nulls_first: true,
         };
         // [d ASC, h DESC] also satisfies schema.
-        eq_properties.add_new_orderings([vec![
+        eq_properties.add_new_orderings([LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::clone(col_d),
                 options: option_asc,
@@ -2658,7 +2668,7 @@ mod tests {
                 expr: Arc::clone(col_h),
                 options: option_desc,
             },
-        ]]);
+        ])]);
         let test_cases = vec![
             // TEST CASE 1
             (vec![col_a], vec![(col_a, option_asc)]),
@@ -2940,7 +2950,7 @@ mod tests {
             Field::new("c", DataType::Timestamp(TimeUnit::Nanosecond, None), true),
         ]));
         let base_properties = EquivalenceProperties::new(Arc::clone(&schema))
-            .with_reorder(
+            .with_reorder(LexOrdering::new(
                 ["a", "b", "c"]
                     .into_iter()
                     .map(|c| {
@@ -2953,7 +2963,7 @@ mod tests {
                         })
                     })
                     .collect::<Result<Vec<_>>>()?,
-            );
+            ));
 
         struct TestCase {
             name: &'static str,
@@ -3042,10 +3052,10 @@ mod tests {
                             options: SortOptions::default(),
                         })
                     })
-                    .collect::<Result<Vec<_>>>()?;
+                    .collect::<Result<LexOrdering>>()?;
 
                 assert_eq!(
-                    properties.ordering_satisfy(&sort),
+                    properties.ordering_satisfy(sort.as_ref()),
                     case.should_satisfy_ordering,
                     "failed test '{}'",
                     case.name
@@ -3564,7 +3574,7 @@ mod tests {
                     ordering
                         .iter()
                         .map(|name| parse_sort_expr(name, schema))
-                        .collect::<Vec<_>>()
+                        .collect::<LexOrdering>()
                 })
                 .collect::<Vec<_>>();
 
diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs
index 4bd022975ac3..c3d1b1425b7f 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -35,6 +35,7 @@ use datafusion_common::tree_node::{
 use datafusion_common::Result;
 use datafusion_expr::Operator;
 
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 use itertools::Itertools;
 use petgraph::graph::NodeIndex;
 use petgraph::stable_graph::StableGraph;
@@ -245,10 +246,7 @@ pub fn reassign_predicate_columns(
 }
 
 /// Merge left and right sort expressions, checking for duplicates.
-pub fn merge_vectors(
-    left: &[PhysicalSortExpr],
-    right: &[PhysicalSortExpr],
-) -> Vec<PhysicalSortExpr> {
+pub fn merge_vectors(left: LexOrderingRef, right: LexOrderingRef) -> LexOrdering {
     left.iter()
         .cloned()
         .chain(right.iter().cloned())
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 3fe5d842dfd1..94960c95e4bb 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -25,16 +25,16 @@ use arrow::array::Array;
 use arrow::record_batch::RecordBatch;
 use arrow::{array::ArrayRef, datatypes::Field};
 
-use datafusion_common::ScalarValue;
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::{Accumulator, WindowFrame};
-
 use crate::aggregate::AggregateFunctionExpr;
 use crate::window::window_expr::AggregateWindowExpr;
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr,
 };
-use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
+use crate::{reverse_order_bys, PhysicalExpr};
+use datafusion_common::ScalarValue;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::{Accumulator, WindowFrame};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 
 /// A window expr that takes the form of an aggregate function.
 ///
@@ -43,7 +43,7 @@ use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
 pub struct PlainAggregateWindowExpr {
     aggregate: Arc<AggregateFunctionExpr>,
     partition_by: Vec<Arc<dyn PhysicalExpr>>,
-    order_by: Vec<PhysicalSortExpr>,
+    order_by: LexOrdering,
     window_frame: Arc<WindowFrame>,
 }
 
@@ -52,13 +52,13 @@ impl PlainAggregateWindowExpr {
     pub fn new(
         aggregate: Arc<AggregateFunctionExpr>,
         partition_by: &[Arc<dyn PhysicalExpr>],
-        order_by: &[PhysicalSortExpr],
+        order_by: LexOrderingRef,
         window_frame: Arc<WindowFrame>,
     ) -> Self {
         Self {
             aggregate,
             partition_by: partition_by.to_vec(),
-            order_by: order_by.to_vec(),
+            order_by: LexOrdering::from_ref(order_by),
             window_frame,
         }
     }
@@ -124,8 +124,8 @@ impl WindowExpr for PlainAggregateWindowExpr {
         &self.partition_by
     }
 
-    fn order_by(&self) -> &[PhysicalSortExpr] {
-        &self.order_by
+    fn order_by(&self) -> LexOrderingRef {
+        self.order_by.as_ref()
     }
 
     fn get_window_frame(&self) -> &Arc<WindowFrame> {
@@ -139,14 +139,14 @@ impl WindowExpr for PlainAggregateWindowExpr {
                 Arc::new(PlainAggregateWindowExpr::new(
                     Arc::new(reverse_expr),
                     &self.partition_by.clone(),
-                    &reverse_order_bys(&self.order_by),
+                    reverse_order_bys(self.order_by.as_ref()).as_ref(),
                     Arc::new(self.window_frame.reverse()),
                 )) as _
             } else {
                 Arc::new(SlidingAggregateWindowExpr::new(
                     Arc::new(reverse_expr),
                     &self.partition_by.clone(),
-                    &reverse_order_bys(&self.order_by),
+                    reverse_order_bys(self.order_by.as_ref()).as_ref(),
                     Arc::new(self.window_frame.reverse()),
                 )) as _
             }
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index 8ff277db37df..5f6c5e5c2c1b 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -22,7 +22,6 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use super::{BuiltInWindowFunctionExpr, WindowExpr};
-use crate::expressions::PhysicalSortExpr;
 use crate::window::window_expr::{get_orderby_values, WindowFn};
 use crate::window::{PartitionBatches, PartitionWindowAggStates, WindowState};
 use crate::{reverse_order_bys, EquivalenceProperties, PhysicalExpr};
@@ -34,13 +33,14 @@ use datafusion_common::utils::evaluate_partition_ranges;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::window_state::{WindowAggState, WindowFrameContext};
 use datafusion_expr::WindowFrame;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 
 /// A window expr that takes the form of a [`BuiltInWindowFunctionExpr`].
 #[derive(Debug)]
 pub struct BuiltInWindowExpr {
     expr: Arc<dyn BuiltInWindowFunctionExpr>,
     partition_by: Vec<Arc<dyn PhysicalExpr>>,
-    order_by: Vec<PhysicalSortExpr>,
+    order_by: LexOrdering,
     window_frame: Arc<WindowFrame>,
 }
 
@@ -49,13 +49,13 @@ impl BuiltInWindowExpr {
     pub fn new(
         expr: Arc<dyn BuiltInWindowFunctionExpr>,
         partition_by: &[Arc<dyn PhysicalExpr>],
-        order_by: &[PhysicalSortExpr],
+        order_by: LexOrderingRef,
         window_frame: Arc<WindowFrame>,
     ) -> Self {
         Self {
             expr,
             partition_by: partition_by.to_vec(),
-            order_by: order_by.to_vec(),
+            order_by: LexOrdering::from_ref(order_by),
             window_frame,
         }
     }
@@ -76,7 +76,8 @@ impl BuiltInWindowExpr {
         if let Some(fn_res_ordering) = self.expr.get_result_ordering(schema) {
             if self.partition_by.is_empty() {
                 // In the absence of a PARTITION BY, ordering of `self.expr` is global:
-                eq_properties.add_new_orderings([vec![fn_res_ordering]]);
+                eq_properties
+                    .add_new_orderings([LexOrdering::new(vec![fn_res_ordering])]);
             } else {
                 // If we have a PARTITION BY, built-in functions can not introduce
                 // a global ordering unless the existing ordering is compatible
@@ -117,8 +118,8 @@ impl WindowExpr for BuiltInWindowExpr {
         &self.partition_by
     }
 
-    fn order_by(&self) -> &[PhysicalSortExpr] {
-        &self.order_by
+    fn order_by(&self) -> LexOrderingRef {
+        self.order_by.as_ref()
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ArrayRef> {
@@ -266,7 +267,7 @@ impl WindowExpr for BuiltInWindowExpr {
             Arc::new(BuiltInWindowExpr::new(
                 reverse_expr,
                 &self.partition_by.clone(),
-                &reverse_order_bys(&self.order_by),
+                reverse_order_bys(self.order_by.as_ref()).as_ref(),
                 Arc::new(self.window_frame.reverse()),
             )) as _
         })
diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs
index b889ec8c5d98..1e46baae7b0a 100644
--- a/datafusion/physical-expr/src/window/sliding_aggregate.rs
+++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs
@@ -25,15 +25,15 @@ use arrow::array::{Array, ArrayRef};
 use arrow::datatypes::Field;
 use arrow::record_batch::RecordBatch;
 
-use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::{Accumulator, WindowFrame};
-
 use crate::aggregate::AggregateFunctionExpr;
 use crate::window::window_expr::AggregateWindowExpr;
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr,
 };
 use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{Accumulator, WindowFrame};
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
 
 /// A window expr that takes the form of an aggregate function that
 /// can be incrementally computed over sliding windows.
@@ -43,7 +43,7 @@ use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
 pub struct SlidingAggregateWindowExpr {
     aggregate: Arc<AggregateFunctionExpr>,
     partition_by: Vec<Arc<dyn PhysicalExpr>>,
-    order_by: Vec<PhysicalSortExpr>,
+    order_by: LexOrdering,
     window_frame: Arc<WindowFrame>,
 }
 
@@ -52,13 +52,13 @@ impl SlidingAggregateWindowExpr {
     pub fn new(
         aggregate: Arc<AggregateFunctionExpr>,
         partition_by: &[Arc<dyn PhysicalExpr>],
-        order_by: &[PhysicalSortExpr],
+        order_by: LexOrderingRef,
         window_frame: Arc<WindowFrame>,
     ) -> Self {
         Self {
             aggregate,
             partition_by: partition_by.to_vec(),
-            order_by: order_by.to_vec(),
+            order_by: LexOrdering::from_ref(order_by),
             window_frame,
         }
     }
@@ -108,8 +108,8 @@ impl WindowExpr for SlidingAggregateWindowExpr {
         &self.partition_by
     }
 
-    fn order_by(&self) -> &[PhysicalSortExpr] {
-        &self.order_by
+    fn order_by(&self) -> LexOrderingRef {
+        self.order_by.as_ref()
     }
 
     fn get_window_frame(&self) -> &Arc<WindowFrame> {
@@ -123,14 +123,14 @@ impl WindowExpr for SlidingAggregateWindowExpr {
                 Arc::new(PlainAggregateWindowExpr::new(
                     Arc::new(reverse_expr),
                     &self.partition_by.clone(),
-                    &reverse_order_bys(&self.order_by),
+                    reverse_order_bys(self.order_by.as_ref()).as_ref(),
                     Arc::new(self.window_frame.reverse()),
                 )) as _
             } else {
                 Arc::new(SlidingAggregateWindowExpr::new(
                     Arc::new(reverse_expr),
                     &self.partition_by.clone(),
-                    &reverse_order_bys(&self.order_by),
+                    reverse_order_bys(self.order_by.as_ref()).as_ref(),
                     Arc::new(self.window_frame.reverse()),
                 )) as _
             }
@@ -157,7 +157,7 @@ impl WindowExpr for SlidingAggregateWindowExpr {
                 expr: new_expr,
                 options: req.options,
             })
-            .collect::<Vec<_>>();
+            .collect::<LexOrdering>();
         Some(Arc::new(SlidingAggregateWindowExpr {
             aggregate: self
                 .aggregate
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index 46c46fab68c5..0f882def4433 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -20,7 +20,7 @@ use std::fmt::Debug;
 use std::ops::Range;
 use std::sync::Arc;
 
-use crate::{LexOrderingRef, PhysicalExpr, PhysicalSortExpr};
+use crate::{LexOrderingRef, PhysicalExpr};
 
 use arrow::array::{new_empty_array, Array, ArrayRef};
 use arrow::compute::kernels::sort::SortColumn;
@@ -109,7 +109,7 @@ pub trait WindowExpr: Send + Sync + Debug {
     fn partition_by(&self) -> &[Arc<dyn PhysicalExpr>];
 
     /// Expressions that's from the window function's order by clause, empty if absent
-    fn order_by(&self) -> &[PhysicalSortExpr];
+    fn order_by(&self) -> LexOrderingRef;
 
     /// Get order by columns, empty if absent
     fn order_by_columns(&self, batch: &RecordBatch) -> Result<Vec<SortColumn>> {
diff --git a/datafusion/physical-optimizer/src/topk_aggregation.rs b/datafusion/physical-optimizer/src/topk_aggregation.rs
index c8a28ed0ec0b..0e5fb82d9e93 100644
--- a/datafusion/physical-optimizer/src/topk_aggregation.rs
+++ b/datafusion/physical-optimizer/src/topk_aggregation.rs
@@ -25,6 +25,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::LexOrdering;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::execution_plan::CardinalityEffect;
 use datafusion_physical_plan::projection::ProjectionExec;
@@ -126,7 +127,7 @@ impl TopKAggregation {
             Ok(Transformed::no(plan))
         };
         let child = Arc::clone(child).transform_down(closure).data().ok()?;
-        let sort = SortExec::new(sort.expr().to_vec(), child)
+        let sort = SortExec::new(LexOrdering::new(sort.expr().to_vec()), child)
             .with_fetch(sort.fetch())
             .with_preserve_partitioning(sort.preserve_partitioning());
         Some(Arc::new(sort))
diff --git a/datafusion/physical-plan/benches/spm.rs b/datafusion/physical-plan/benches/spm.rs
index 9cc703f5f726..fbbd27409173 100644
--- a/datafusion/physical-plan/benches/spm.rs
+++ b/datafusion/physical-plan/benches/spm.rs
@@ -22,6 +22,7 @@ use arrow_array::{ArrayRef, Int32Array, Int64Array, StringArray};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::expressions::col;
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::memory::MemoryExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::{collect, ExecutionPlan};
@@ -70,7 +71,7 @@ fn generate_spm_for_round_robin_tie_breaker(
     let partitiones = vec![rbs.clone(); partition_count];
 
     let schema = rb.schema();
-    let sort = vec![
+    let sort = LexOrdering::new(vec![
         PhysicalSortExpr {
             expr: col("b", &schema).unwrap(),
             options: Default::default(),
@@ -79,7 +80,7 @@ fn generate_spm_for_round_robin_tie_breaker(
             expr: col("c", &schema).unwrap(),
             options: Default::default(),
         },
-    ];
+    ]);
 
     let exec = MemoryExec::try_new(&partitiones, schema, None).unwrap();
     SortPreservingMergeExec::new(sort, Arc::new(exec))
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 48a03af19dbd..4193cc187e10 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -937,10 +937,10 @@ fn get_aggregate_expr_req(
     // necessary, or the aggregation is performing a "second stage" calculation,
     // then ignore the ordering requirement.
     if !aggr_expr.order_sensitivity().hard_requires() || !agg_mode.is_first_stage() {
-        return vec![];
+        return LexOrdering::default();
     }
 
-    let mut req = aggr_expr.order_bys().unwrap_or_default().to_vec();
+    let mut req = LexOrdering::from_ref(aggr_expr.order_bys().unwrap_or_default());
 
     // In non-first stage modes, we accumulate data (using `merge_batch`) from
     // different partitions (i.e. merge partial results). During this merge, we
@@ -983,7 +983,7 @@ fn finer_ordering(
     agg_mode: &AggregateMode,
 ) -> Option<LexOrdering> {
     let aggr_req = get_aggregate_expr_req(aggr_expr, group_by, agg_mode);
-    eq_properties.get_finer_ordering(existing_req, &aggr_req)
+    eq_properties.get_finer_ordering(existing_req.as_ref(), aggr_req.as_ref())
 }
 
 /// Concatenates the given slices.
@@ -1014,12 +1014,12 @@ pub fn get_finer_aggregate_exprs_requirement(
     eq_properties: &EquivalenceProperties,
     agg_mode: &AggregateMode,
 ) -> Result<LexRequirement> {
-    let mut requirement = vec![];
+    let mut requirement = LexOrdering::default();
     for aggr_expr in aggr_exprs.iter_mut() {
         if let Some(finer_ordering) =
             finer_ordering(&requirement, aggr_expr, group_by, eq_properties, agg_mode)
         {
-            if eq_properties.ordering_satisfy(&finer_ordering) {
+            if eq_properties.ordering_satisfy(finer_ordering.as_ref()) {
                 // Requirement is satisfied by existing ordering
                 requirement = finer_ordering;
                 continue;
@@ -1033,7 +1033,7 @@ pub fn get_finer_aggregate_exprs_requirement(
                 eq_properties,
                 agg_mode,
             ) {
-                if eq_properties.ordering_satisfy(&finer_ordering) {
+                if eq_properties.ordering_satisfy(finer_ordering.as_ref()) {
                     // Reverse requirement is satisfied by exiting ordering.
                     // Hence reverse the aggregator
                     requirement = finer_ordering;
@@ -1074,7 +1074,9 @@ pub fn get_finer_aggregate_exprs_requirement(
         );
     }
 
-    Ok(PhysicalSortRequirement::from_sort_exprs(&requirement))
+    Ok(PhysicalSortRequirement::from_sort_exprs(
+        requirement.inner.iter(),
+    ))
 }
 
 /// Returns physical expressions for arguments to evaluate against a batch.
@@ -2088,7 +2090,7 @@ mod tests {
         let args = [col("b", schema)?];
 
         AggregateExprBuilder::new(first_value_udaf(), args.to_vec())
-            .order_by(ordering_req.to_vec())
+            .order_by(LexOrdering::new(ordering_req.to_vec()))
             .schema(Arc::new(schema.clone()))
             .alias(String::from("first_value(b) ORDER BY [b ASC NULLS LAST]"))
             .build()
@@ -2106,7 +2108,7 @@ mod tests {
         }];
         let args = [col("b", schema)?];
         AggregateExprBuilder::new(last_value_udaf(), args.to_vec())
-            .order_by(ordering_req.to_vec())
+            .order_by(LexOrdering::new(ordering_req.to_vec()))
             .schema(Arc::new(schema.clone()))
             .alias(String::from("last_value(b) ORDER BY [b ASC NULLS LAST]"))
             .build()
@@ -2272,7 +2274,7 @@ mod tests {
             ]),
         ];
 
-        let common_requirement = vec![
+        let common_requirement = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::clone(col_a),
                 options: options1,
@@ -2281,14 +2283,14 @@ mod tests {
                 expr: Arc::clone(col_c),
                 options: options1,
             },
-        ];
+        ]);
         let mut aggr_exprs = order_by_exprs
             .into_iter()
             .map(|order_by_expr| {
                 let ordering_req = order_by_expr.unwrap_or_default();
                 AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)])
                     .alias("a")
-                    .order_by(ordering_req.to_vec())
+                    .order_by(LexOrdering::new(ordering_req.to_vec()))
                     .schema(Arc::clone(&test_schema))
                     .build()
                     .map(Arc::new)
diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs
index accb2fda1131..24846d239591 100644
--- a/datafusion/physical-plan/src/aggregates/order/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/order/mod.rs
@@ -19,7 +19,7 @@ use arrow_array::ArrayRef;
 use arrow_schema::Schema;
 use datafusion_common::Result;
 use datafusion_expr::EmitTo;
-use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use std::mem::size_of;
 
 mod full;
@@ -45,7 +45,7 @@ impl GroupOrdering {
     pub fn try_new(
         input_schema: &Schema,
         mode: &InputOrderMode,
-        ordering: &[PhysicalSortExpr],
+        ordering: LexOrderingRef,
     ) -> Result<Self> {
         match mode {
             InputOrderMode::Linear => Ok(GroupOrdering::None),
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index 2dd1ea8a5449..5cc55dc0d028 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -21,7 +21,7 @@ use arrow_schema::Schema;
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_expr::EmitTo;
-use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use std::mem::size_of;
 use std::sync::Arc;
 
@@ -107,7 +107,7 @@ impl GroupOrderingPartial {
     pub fn try_new(
         input_schema: &Schema,
         order_indices: &[usize],
-        ordering: &[PhysicalSortExpr],
+        ordering: LexOrderingRef,
     ) -> Result<Self> {
         assert!(!order_indices.is_empty());
         assert!(order_indices.len() <= ordering.len());
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 7d21cc2f1944..fe05f7375ed3 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -48,14 +48,14 @@ use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr};
 
+use super::order::GroupOrdering;
+use super::AggregateExec;
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 use log::debug;
 
-use super::order::GroupOrdering;
-use super::AggregateExec;
-
 #[derive(Debug, Clone)]
 /// This object tracks the aggregation phase (input/output)
 pub(crate) enum ExecutionState {
@@ -80,7 +80,7 @@ struct SpillState {
     // the execution.
     // ========================================================================
     /// Sorting expression for spilling batches
-    spill_expr: Vec<PhysicalSortExpr>,
+    spill_expr: LexOrdering,
 
     /// Schema for spilling batches
     spill_schema: SchemaRef,
@@ -511,7 +511,7 @@ impl GroupedHashAggregateStream {
         let group_ordering = GroupOrdering::try_new(
             &group_schema,
             &agg.input_order_mode,
-            ordering.as_slice(),
+            ordering.as_ref(),
         )?;
 
         let group_values = new_group_values(group_schema)?;
@@ -965,7 +965,7 @@ impl GroupedHashAggregateStream {
     /// Emit all rows, sort them, and store them on disk.
     fn spill(&mut self) -> Result<()> {
         let emit = self.emit(EmitTo::All, true)?;
-        let sorted = sort_batch(&emit, &self.spill_state.spill_expr, None)?;
+        let sorted = sort_batch(&emit, self.spill_state.spill_expr.as_ref(), None)?;
         let spillfile = self.runtime.disk_manager.create_tmp_file("HashAggSpill")?;
         // TODO: slice large `sorted` and write to multiple files in parallel
         spill_record_batch_by_size(
@@ -1030,7 +1030,7 @@ impl GroupedHashAggregateStream {
         streams.push(Box::pin(RecordBatchStreamAdapter::new(
             Arc::clone(&schema),
             futures::stream::once(futures::future::lazy(move |_| {
-                sort_batch(&batch, &expr, None)
+                sort_batch(&batch, expr.as_ref(), None)
             })),
         )));
         for spill in self.spill_state.spills.drain(..) {
@@ -1041,7 +1041,7 @@ impl GroupedHashAggregateStream {
         self.input = StreamingMergeBuilder::new()
             .with_streams(streams)
             .with_schema(schema)
-            .with_expressions(&self.spill_state.spill_expr)
+            .with_expressions(self.spill_state.spill_expr.as_ref())
             .with_metrics(self.baseline_metrics.clone())
             .with_batch_size(self.batch_size)
             .with_reservation(self.reservation.new_empty())
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index e79b3c817bd1..9f3a76e28577 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -25,7 +25,7 @@ use arrow_schema::SchemaRef;
 
 use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
 use datafusion_expr::display_schema;
-use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr::LexOrdering;
 
 use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
 
@@ -459,23 +459,6 @@ impl<'a> fmt::Display for ProjectSchemaDisplay<'a> {
     }
 }
 
-/// A wrapper to customize output ordering display.
-#[derive(Debug)]
-pub struct OutputOrderingDisplay<'a>(pub &'a [PhysicalSortExpr]);
-
-impl<'a> fmt::Display for OutputOrderingDisplay<'a> {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "[")?;
-        for (i, e) in self.0.iter().enumerate() {
-            if i > 0 {
-                write!(f, ", ")?
-            }
-            write!(f, "{e}")?;
-        }
-        write!(f, "]")
-    }
-}
-
 pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::Result {
     if let Some(ordering) = orderings.first() {
         if !ordering.is_empty() {
@@ -489,8 +472,8 @@ pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::R
                 orderings.iter().enumerate().filter(|(_, o)| !o.is_empty())
             {
                 match idx {
-                    0 => write!(f, "{}", OutputOrderingDisplay(ordering))?,
-                    _ => write!(f, ", {}", OutputOrderingDisplay(ordering))?,
+                    0 => write!(f, "[{}]", ordering)?,
+                    _ => write!(f, ", [{}]", ordering)?,
                 }
             }
             let end = if orderings.len() == 1 { "" } else { "]" };
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index e6484452d43e..d65320dbab68 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -37,8 +37,8 @@ pub use datafusion_physical_expr::window::WindowExpr;
 pub use datafusion_physical_expr::{
     expressions, udf, Distribution, Partitioning, PhysicalExpr,
 };
-use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr};
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
+use datafusion_physical_expr_common::sort_expr::{LexOrderingRef, LexRequirement};
 
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::display::DisplayableExecutionPlan;
@@ -443,7 +443,7 @@ pub trait ExecutionPlanProperties {
     /// For example, `SortExec` (obviously) produces sorted output as does
     /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted
     /// output if its input is sorted as it does not reorder the input rows.
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
+    fn output_ordering(&self) -> Option<LexOrderingRef>;
 
     /// Get the [`EquivalenceProperties`] within the plan.
     ///
@@ -474,7 +474,7 @@ impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
         self.properties().execution_mode()
     }
 
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+    fn output_ordering(&self) -> Option<LexOrderingRef> {
         self.properties().output_ordering()
     }
 
@@ -492,7 +492,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan {
         self.properties().execution_mode()
     }
 
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+    fn output_ordering(&self) -> Option<LexOrderingRef> {
         self.properties().output_ordering()
     }
 
@@ -643,7 +643,7 @@ impl PlanProperties {
         &self.partitioning
     }
 
-    pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+    pub fn output_ordering(&self) -> Option<LexOrderingRef> {
         self.output_ordering.as_deref()
     }
 
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 957230f51372..a87743565adf 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -858,7 +858,7 @@ pub(crate) mod tests {
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
     use datafusion_physical_expr::{Partitioning, PhysicalExpr};
-    use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+    use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
     use rstest::rstest;
 
@@ -888,7 +888,7 @@ pub(crate) mod tests {
         let mut exec =
             MemoryExec::try_new(&[batches], Arc::clone(&schema), None).unwrap();
         if !sorted_column_names.is_empty() {
-            let mut sort_info = Vec::new();
+            let mut sort_info = LexOrdering::default();
             for name in sorted_column_names {
                 let index = schema.index_of(name).unwrap();
                 let sort_expr = PhysicalSortExpr {
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 20fafcc34773..90dc407fcaed 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -52,7 +52,7 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use futures::{Stream, StreamExt};
 use hashbrown::HashSet;
 
@@ -88,9 +88,9 @@ pub struct SortMergeJoinExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// The left SortExpr
-    left_sort_exprs: Vec<PhysicalSortExpr>,
+    left_sort_exprs: LexOrdering,
     /// The right SortExpr
-    right_sort_exprs: Vec<PhysicalSortExpr>,
+    right_sort_exprs: LexOrdering,
     /// Sort options of join columns used in sorting left and right execution plans
     pub sort_options: Vec<SortOptions>,
     /// If null_equals_null is true, null == null else null != null
@@ -159,8 +159,8 @@ impl SortMergeJoinExec {
             join_type,
             schema,
             metrics: ExecutionPlanMetricsSet::new(),
-            left_sort_exprs,
-            right_sort_exprs,
+            left_sort_exprs: LexOrdering::new(left_sort_exprs),
+            right_sort_exprs: LexOrdering::new(right_sort_exprs),
             sort_options,
             null_equals_null,
             cache,
@@ -299,10 +299,10 @@ impl ExecutionPlan for SortMergeJoinExec {
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![
             Some(PhysicalSortRequirement::from_sort_exprs(
-                &self.left_sort_exprs,
+                self.left_sort_exprs.iter(),
             )),
             Some(PhysicalSortRequirement::from_sort_exprs(
-                &self.right_sort_exprs,
+                self.right_sort_exprs.iter(),
             )),
         ]
     }
diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs
index 02c71dab3df2..5ccdd9b40dee 100644
--- a/datafusion/physical-plan/src/joins/stream_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs
@@ -40,6 +40,7 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use hashbrown::raw::RawTable;
 use hashbrown::HashSet;
 
@@ -744,8 +745,8 @@ pub fn prepare_sorted_exprs(
     filter: &JoinFilter,
     left: &Arc<dyn ExecutionPlan>,
     right: &Arc<dyn ExecutionPlan>,
-    left_sort_exprs: &[PhysicalSortExpr],
-    right_sort_exprs: &[PhysicalSortExpr],
+    left_sort_exprs: LexOrderingRef,
+    right_sort_exprs: LexOrderingRef,
 ) -> Result<(SortedFilterExpr, SortedFilterExpr, ExprIntervalGraph)> {
     let err = || {
         datafusion_common::plan_datafusion_err!("Filter does not include the child order")
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 3e0cd48da2bf..81c13c652513 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -48,7 +48,6 @@ use crate::joins::utils::{
 };
 use crate::{
     execution_mode_from_children,
-    expressions::PhysicalSortExpr,
     joins::StreamJoinPartitionMode,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
@@ -74,7 +73,9 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
 
 use ahash::RandomState;
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrdering, LexOrderingRef, LexRequirement,
+};
 use futures::{ready, Stream, StreamExt};
 use hashbrown::HashSet;
 use parking_lot::Mutex;
@@ -187,9 +188,9 @@ pub struct SymmetricHashJoinExec {
     /// If null_equals_null is true, null == null else null != null
     pub(crate) null_equals_null: bool,
     /// Left side sort expression(s)
-    pub(crate) left_sort_exprs: Option<Vec<PhysicalSortExpr>>,
+    pub(crate) left_sort_exprs: Option<LexOrdering>,
     /// Right side sort expression(s)
-    pub(crate) right_sort_exprs: Option<Vec<PhysicalSortExpr>>,
+    pub(crate) right_sort_exprs: Option<LexOrdering>,
     /// Partition Mode
     mode: StreamJoinPartitionMode,
     /// Cache holding plan properties like equivalences, output partitioning etc.
@@ -211,8 +212,8 @@ impl SymmetricHashJoinExec {
         filter: Option<JoinFilter>,
         join_type: &JoinType,
         null_equals_null: bool,
-        left_sort_exprs: Option<Vec<PhysicalSortExpr>>,
-        right_sort_exprs: Option<Vec<PhysicalSortExpr>>,
+        left_sort_exprs: Option<LexOrdering>,
+        right_sort_exprs: Option<LexOrdering>,
         mode: StreamJoinPartitionMode,
     ) -> Result<Self> {
         let left_schema = left.schema();
@@ -319,12 +320,12 @@ impl SymmetricHashJoinExec {
     }
 
     /// Get left_sort_exprs
-    pub fn left_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> {
+    pub fn left_sort_exprs(&self) -> Option<LexOrderingRef> {
         self.left_sort_exprs.as_deref()
     }
 
     /// Get right_sort_exprs
-    pub fn right_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> {
+    pub fn right_sort_exprs(&self) -> Option<LexOrderingRef> {
         self.right_sort_exprs.as_deref()
     }
 
@@ -417,9 +418,11 @@ impl ExecutionPlan for SymmetricHashJoinExec {
         vec![
             self.left_sort_exprs
                 .as_ref()
+                .map(LexOrdering::iter)
                 .map(PhysicalSortRequirement::from_sort_exprs),
             self.right_sort_exprs
                 .as_ref()
+                .map(LexOrdering::iter)
                 .map(PhysicalSortRequirement::from_sort_exprs),
         ]
     }
@@ -1646,6 +1649,7 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{binary, col, lit, Column};
+    use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
     use once_cell::sync::Lazy;
     use rstest::*;
@@ -1746,7 +1750,7 @@ mod tests {
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
 
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: binary(
                 col("la1", left_schema)?,
                 Operator::Plus,
@@ -1754,11 +1758,11 @@ mod tests {
                 left_schema,
             )?,
             options: SortOptions::default(),
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ra1", right_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -1825,14 +1829,14 @@ mod tests {
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
 
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("la1", left_schema)?,
             options: SortOptions::default(),
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ra1", right_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -1968,20 +1972,20 @@ mod tests {
         let (left_partition, right_partition) = get_or_create_table((11, 21), 8)?;
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("la1_des", left_schema)?,
             options: SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ra1_des", right_schema)?,
             options: SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2026,20 +2030,20 @@ mod tests {
         let (left_partition, right_partition) = get_or_create_table((10, 11), 8)?;
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("l_asc_null_first", left_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("r_asc_null_first", right_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2084,20 +2088,20 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("l_asc_null_last", left_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: false,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("r_asc_null_last", right_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: false,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2144,20 +2148,20 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("l_desc_null_first", left_schema)?,
             options: SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("r_desc_null_first", right_schema)?,
             options: SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2205,15 +2209,15 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("la1", left_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
 
-        let right_sorted = vec![PhysicalSortExpr {
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ra1", right_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2263,20 +2267,20 @@ mod tests {
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
         let left_sorted = vec![
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("la1", left_schema)?,
                 options: SortOptions::default(),
-            }],
-            vec![PhysicalSortExpr {
+            }]),
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("la2", left_schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
         ];
 
-        let right_sorted = vec![PhysicalSortExpr {
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ra1", right_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
 
         let (left, right) = create_memory_table(
             left_partition,
@@ -2343,20 +2347,20 @@ mod tests {
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
         let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("lt1", left_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("rt1", right_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2425,20 +2429,20 @@ mod tests {
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
         let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("li1", left_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("ri1", right_schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
@@ -2500,14 +2504,14 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let left_sorted = vec![PhysicalSortExpr {
+        let left_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("l_float", left_schema)?,
             options: SortOptions::default(),
-        }];
-        let right_sorted = vec![PhysicalSortExpr {
+        }]);
+        let right_sorted = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("r_float", right_schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
         let (left, right) = create_memory_table(
             left_partition,
             right_partition,
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index 090d60f0bac3..421fd0da808c 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -101,8 +101,10 @@ pub async fn partitioned_sym_join_with_filter(
         filter,
         join_type,
         null_equals_null,
-        left.output_ordering().map(|p| p.to_vec()),
-        right.output_ordering().map(|p| p.to_vec()),
+        left.output_ordering().map(|p| LexOrdering::new(p.to_vec())),
+        right
+            .output_ordering()
+            .map(|p| LexOrdering::new(p.to_vec())),
         StreamJoinPartitionMode::Partitioned,
     )?;
 
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index e7c191f9835e..d3fa37c2ac80 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -449,10 +449,10 @@ pub fn adjust_right_output_partitioning(
 /// the left column (zeroth index in the tuple) inside `right_ordering`.
 fn replace_on_columns_of_right_ordering(
     on_columns: &[(PhysicalExprRef, PhysicalExprRef)],
-    right_ordering: &mut [PhysicalSortExpr],
+    right_ordering: &mut LexOrdering,
 ) -> Result<()> {
     for (left_col, right_col) in on_columns {
-        for item in right_ordering.iter_mut() {
+        for item in right_ordering.inner.iter_mut() {
             let new_expr = Arc::clone(&item.expr)
                 .transform(|e| {
                     if e.eq(right_col) {
@@ -472,7 +472,7 @@ fn offset_ordering(
     ordering: LexOrderingRef,
     join_type: &JoinType,
     offset: usize,
-) -> Vec<PhysicalSortExpr> {
+) -> LexOrdering {
     match join_type {
         // In the case below, right ordering should be offsetted with the left
         // side length, since we append the right table to the left table.
@@ -483,7 +483,7 @@ fn offset_ordering(
                 options: sort_expr.options,
             })
             .collect(),
-        _ => ordering.to_vec(),
+        _ => LexOrdering::from_ref(ordering),
     }
 }
 
@@ -503,15 +503,16 @@ pub fn calculate_join_output_ordering(
             if join_type == JoinType::Inner && probe_side == Some(JoinSide::Left) {
                 replace_on_columns_of_right_ordering(
                     on_columns,
-                    &mut right_ordering.to_vec(),
+                    &mut LexOrdering::from_ref(right_ordering),
                 )
                 .ok()?;
                 merge_vectors(
                     left_ordering,
-                    &offset_ordering(right_ordering, &join_type, left_columns_len),
+                    offset_ordering(right_ordering, &join_type, left_columns_len)
+                        .as_ref(),
                 )
             } else {
-                left_ordering.to_vec()
+                LexOrdering::from_ref(left_ordering)
             }
         }
         [false, true] => {
@@ -519,11 +520,12 @@ pub fn calculate_join_output_ordering(
             if join_type == JoinType::Inner && probe_side == Some(JoinSide::Right) {
                 replace_on_columns_of_right_ordering(
                     on_columns,
-                    &mut right_ordering.to_vec(),
+                    &mut LexOrdering::from_ref(right_ordering),
                 )
                 .ok()?;
                 merge_vectors(
-                    &offset_ordering(right_ordering, &join_type, left_columns_len),
+                    offset_ordering(right_ordering, &join_type, left_columns_len)
+                        .as_ref(),
                     left_ordering,
                 )
             } else {
@@ -2600,7 +2602,7 @@ mod tests {
     #[test]
     fn test_calculate_join_output_ordering() -> Result<()> {
         let options = SortOptions::default();
-        let left_ordering = vec![
+        let left_ordering = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::new(Column::new("a", 0)),
                 options,
@@ -2613,8 +2615,8 @@ mod tests {
                 expr: Arc::new(Column::new("d", 3)),
                 options,
             },
-        ];
-        let right_ordering = vec![
+        ]);
+        let right_ordering = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: Arc::new(Column::new("z", 2)),
                 options,
@@ -2623,7 +2625,7 @@ mod tests {
                 expr: Arc::new(Column::new("y", 1)),
                 options,
             },
-        ];
+        ]);
         let join_type = JoinType::Inner;
         let on_columns = [(
             Arc::new(Column::new("b", 1)) as _,
@@ -2634,7 +2636,7 @@ mod tests {
         let probe_sides = [Some(JoinSide::Left), Some(JoinSide::Right)];
 
         let expected = [
-            Some(vec![
+            Some(LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::new(Column::new("a", 0)),
                     options,
@@ -2655,8 +2657,8 @@ mod tests {
                     expr: Arc::new(Column::new("y", 6)),
                     options,
                 },
-            ]),
-            Some(vec![
+            ])),
+            Some(LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: Arc::new(Column::new("z", 7)),
                     options,
@@ -2677,7 +2679,7 @@ mod tests {
                     expr: Arc::new(Column::new("d", 3)),
                     options,
                 },
-            ]),
+            ])),
         ];
 
         for (i, (maintains_input_order, probe_side)) in
@@ -2685,8 +2687,8 @@ mod tests {
         {
             assert_eq!(
                 calculate_join_output_ordering(
-                    &left_ordering,
-                    &right_ordering,
+                    left_ordering.as_ref(),
+                    right_ordering.as_ref(),
                     join_type,
                     &on_columns,
                     left_columns_len,
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index dd4868d1bfcc..56ed144845a0 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -22,7 +22,6 @@ use std::fmt;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use super::expressions::PhysicalSortExpr;
 use super::{
     common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
     PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
@@ -79,10 +78,7 @@ impl DisplayAs for MemoryExec {
                     .sort_information
                     .first()
                     .map(|output_ordering| {
-                        format!(
-                            ", output_ordering={}",
-                            PhysicalSortExpr::format_list(output_ordering)
-                        )
+                        format!(", output_ordering={}", output_ordering)
                     })
                     .unwrap_or_default();
 
@@ -216,7 +212,7 @@ impl MemoryExec {
         let fields = self.schema.fields();
         let ambiguous_column = sort_information
             .iter()
-            .flatten()
+            .flat_map(|ordering| ordering.inner.clone())
             .flat_map(|expr| collect_columns(&expr.expr))
             .find(|col| {
                 fields
@@ -365,6 +361,7 @@ mod tests {
     use arrow_schema::{DataType, Field, Schema, SortOptions};
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
     #[test]
     fn test_memory_order_eq() -> datafusion_common::Result<()> {
@@ -373,7 +370,7 @@ mod tests {
             Field::new("b", DataType::Int64, false),
             Field::new("c", DataType::Int64, false),
         ]));
-        let sort1 = vec![
+        let sort1 = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: col("a", &schema)?,
                 options: SortOptions::default(),
@@ -382,12 +379,12 @@ mod tests {
                 expr: col("b", &schema)?,
                 options: SortOptions::default(),
             },
-        ];
-        let sort2 = vec![PhysicalSortExpr {
+        ]);
+        let sort2 = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c", &schema)?,
             options: SortOptions::default(),
-        }];
-        let mut expected_output_order = vec![];
+        }]);
+        let mut expected_output_order = LexOrdering::default();
         expected_output_order.extend(sort1.clone());
         expected_output_order.extend(sort2.clone());
 
@@ -396,8 +393,8 @@ mod tests {
             .try_with_sort_information(sort_information)?;
 
         assert_eq!(
-            mem_exec.properties().output_ordering().unwrap(),
-            expected_output_order
+            mem_exec.properties().output_ordering().unwrap().to_vec(),
+            expected_output_order.inner
         );
         let eq_properties = mem_exec.properties().equivalence_properties();
         assert!(eq_properties.oeq_class().contains(&sort1));
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 601c1e873152..06144f98c89d 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -47,9 +47,10 @@ use datafusion_common::{not_impl_err, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr, PhysicalSortExpr};
+use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
 
 use crate::execution_plan::CardinalityEffect;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use futures::stream::Stream;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use hashbrown::HashMap;
@@ -502,11 +503,7 @@ impl DisplayAs for RepartitionExec {
                 }
 
                 if let Some(sort_exprs) = self.sort_exprs() {
-                    write!(
-                        f,
-                        ", sort_exprs={}",
-                        PhysicalSortExpr::format_list(sort_exprs)
-                    )?;
+                    write!(f, ", sort_exprs={}", LexOrdering::from_ref(sort_exprs))?;
                 }
                 Ok(())
             }
@@ -1561,10 +1558,10 @@ mod tests {
 mod test {
     use arrow_schema::{DataType, Field, Schema, SortOptions};
 
-    use datafusion_physical_expr::expressions::col;
-
     use crate::memory::MemoryExec;
     use crate::union::UnionExec;
+    use datafusion_physical_expr::expressions::col;
+    use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
     use super::*;
 
@@ -1659,12 +1656,12 @@ mod test {
         Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
     }
 
-    fn sort_exprs(schema: &Schema) -> Vec<PhysicalSortExpr> {
+    fn sort_exprs(schema: &Schema) -> LexOrdering {
         let options = SortOptions::default();
-        vec![PhysicalSortExpr {
+        LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("c0", schema).unwrap(),
             options,
-        }]
+        }])
     }
 
     fn memory_exec(schema: &SchemaRef) -> Arc<dyn ExecutionPlan> {
@@ -1673,7 +1670,7 @@ mod test {
 
     fn sorted_memory_exec(
         schema: &SchemaRef,
-        sort_exprs: Vec<PhysicalSortExpr>,
+        sort_exprs: LexOrdering,
     ) -> Arc<dyn ExecutionPlan> {
         Arc::new(
             MemoryExec::try_new(&[vec![]], Arc::clone(schema), None)
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 649c05d52e8b..8f853464c9bd 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -57,7 +57,6 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::sort::sort_batch;
 use crate::{
@@ -73,6 +72,7 @@ use datafusion_common::Result;
 use datafusion_execution::{RecordBatchStream, TaskContext};
 use datafusion_physical_expr::LexOrdering;
 
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use futures::{ready, Stream, StreamExt};
 use log::trace;
 
@@ -82,7 +82,7 @@ pub struct PartialSortExec {
     /// Input schema
     pub(crate) input: Arc<dyn ExecutionPlan>,
     /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
+    expr: LexOrdering,
     /// Length of continuous matching columns of input that satisfy
     /// the required ordering for the sort
     common_prefix_length: usize,
@@ -100,7 +100,7 @@ pub struct PartialSortExec {
 impl PartialSortExec {
     /// Create a new partial sort execution plan
     pub fn new(
-        expr: Vec<PhysicalSortExpr>,
+        expr: LexOrdering,
         input: Arc<dyn ExecutionPlan>,
         common_prefix_length: usize,
     ) -> Self {
@@ -159,8 +159,8 @@ impl PartialSortExec {
     }
 
     /// Sort expressions
-    pub fn expr(&self) -> &[PhysicalSortExpr] {
-        &self.expr
+    pub fn expr(&self) -> LexOrderingRef {
+        self.expr.as_ref()
     }
 
     /// If `Some(fetch)`, limits output to only the first "fetch" items
@@ -212,13 +212,12 @@ impl DisplayAs for PartialSortExec {
     ) -> std::fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let expr = PhysicalSortExpr::format_list(&self.expr);
                 let common_prefix_length = self.common_prefix_length;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{expr}], common_prefix_length=[{common_prefix_length}]", )
+                        write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr)
                     }
-                    None => write!(f, "PartialSortExec: expr=[{expr}], common_prefix_length=[{common_prefix_length}]"),
+                    None => write!(f, "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr),
                 }
             }
         }
@@ -315,7 +314,7 @@ struct PartialSortStream {
     /// The input plan
     input: SendableRecordBatchStream,
     /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
+    expr: LexOrdering,
     /// Length of prefix common to input ordering and required ordering of plan
     /// should be more than 0 otherwise PartialSort is not applicable
     common_prefix_length: usize,
@@ -394,7 +393,7 @@ impl PartialSortStream {
     fn sort_in_mem_batches(self: &mut Pin<&mut Self>) -> Result<RecordBatch> {
         let input_batch = concat_batches(&self.schema(), &self.in_mem_batches)?;
         self.in_mem_batches.clear();
-        let result = sort_batch(&input_batch, &self.expr, self.fetch)?;
+        let result = sort_batch(&input_batch, self.expr.as_ref(), self.fetch)?;
         if let Some(remaining_fetch) = self.fetch {
             // remaining_fetch - result.num_rows() is always be >= 0
             // because result length of sort_batch with limit cannot be
@@ -448,6 +447,7 @@ mod tests {
 
     use crate::collect;
     use crate::expressions::col;
+    use crate::expressions::PhysicalSortExpr;
     use crate::memory::MemoryExec;
     use crate::sorts::sort::SortExec;
     use crate::test;
@@ -475,7 +475,7 @@ mod tests {
         };
 
         let partial_sort_exec = Arc::new(PartialSortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: col("a", &schema)?,
                     options: option_asc,
@@ -488,7 +488,7 @@ mod tests {
                     expr: col("c", &schema)?,
                     options: option_asc,
                 },
-            ],
+            ]),
             Arc::clone(&source),
             2,
         )) as Arc<dyn ExecutionPlan>;
@@ -539,7 +539,7 @@ mod tests {
         for common_prefix_length in [1, 2] {
             let partial_sort_exec = Arc::new(
                 PartialSortExec::new(
-                    vec![
+                    LexOrdering::new(vec![
                         PhysicalSortExpr {
                             expr: col("a", &schema)?,
                             options: option_asc,
@@ -552,7 +552,7 @@ mod tests {
                             expr: col("c", &schema)?,
                             options: option_asc,
                         },
-                    ],
+                    ]),
                     Arc::clone(&source),
                     common_prefix_length,
                 )
@@ -611,7 +611,7 @@ mod tests {
             [(1, &source_tables[0]), (2, &source_tables[1])]
         {
             let partial_sort_exec = Arc::new(PartialSortExec::new(
-                vec![
+                LexOrdering::new(vec![
                     PhysicalSortExpr {
                         expr: col("a", &schema)?,
                         options: option_asc,
@@ -624,7 +624,7 @@ mod tests {
                         expr: col("c", &schema)?,
                         options: option_asc,
                     },
-                ],
+                ]),
                 Arc::clone(source),
                 common_prefix_length,
             ));
@@ -701,7 +701,7 @@ mod tests {
         };
         let schema = mem_exec.schema();
         let partial_sort_executor = PartialSortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: col("a", &schema)?,
                     options: option_asc,
@@ -714,7 +714,7 @@ mod tests {
                     expr: col("c", &schema)?,
                     options: option_asc,
                 },
-            ],
+            ]),
             Arc::clone(&mem_exec),
             1,
         );
@@ -762,7 +762,7 @@ mod tests {
             (Some(250), vec![0, 125, 125]),
         ] {
             let partial_sort_executor = PartialSortExec::new(
-                vec![
+                LexOrdering::new(vec![
                     PhysicalSortExpr {
                         expr: col("a", &schema)?,
                         options: option_asc,
@@ -775,7 +775,7 @@ mod tests {
                         expr: col("c", &schema)?,
                         options: option_asc,
                     },
-                ],
+                ]),
                 Arc::clone(&mem_exec),
                 1,
             )
@@ -834,10 +834,10 @@ mod tests {
         )?);
 
         let partial_sort_exec = Arc::new(PartialSortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("field_name", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             input,
             1,
         ));
@@ -923,7 +923,7 @@ mod tests {
         )?;
 
         let partial_sort_exec = Arc::new(PartialSortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: col("a", &schema)?,
                     options: option_asc,
@@ -936,7 +936,7 @@ mod tests {
                     expr: col("c", &schema)?,
                     options: option_desc,
                 },
-            ],
+            ]),
             Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?),
             2,
         ));
@@ -1000,10 +1000,10 @@ mod tests {
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
         let sort_exec = Arc::new(PartialSortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("a", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             blocking_exec,
             1,
         ));
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 921678a4ad92..32d6d3e0073c 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -52,7 +52,9 @@ use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::LexOrdering;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement;
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrderingRef, PhysicalSortRequirement,
+};
 
 use crate::execution_plan::CardinalityEffect;
 use futures::{StreamExt, TryStreamExt};
@@ -243,7 +245,7 @@ impl ExternalSorter {
     pub fn new(
         partition_id: usize,
         schema: SchemaRef,
-        expr: Vec<PhysicalSortExpr>,
+        expr: LexOrdering,
         batch_size: usize,
         fetch: Option<usize>,
         sort_spill_reservation_bytes: usize,
@@ -265,7 +267,7 @@ impl ExternalSorter {
             in_mem_batches: vec![],
             in_mem_batches_sorted: true,
             spills: vec![],
-            expr: expr.into(),
+            expr: expr.inner.into(),
             metrics,
             fetch,
             reservation,
@@ -345,7 +347,7 @@ impl ExternalSorter {
             StreamingMergeBuilder::new()
                 .with_streams(streams)
                 .with_schema(Arc::clone(&self.schema))
-                .with_expressions(&self.expr)
+                .with_expressions(self.expr.to_vec().as_slice())
                 .with_metrics(self.metrics.baseline.clone())
                 .with_batch_size(self.batch_size)
                 .with_fetch(self.fetch)
@@ -537,7 +539,7 @@ impl ExternalSorter {
         StreamingMergeBuilder::new()
             .with_streams(streams)
             .with_schema(Arc::clone(&self.schema))
-            .with_expressions(&self.expr)
+            .with_expressions(self.expr.as_ref())
             .with_metrics(metrics)
             .with_batch_size(self.batch_size)
             .with_fetch(self.fetch)
@@ -601,7 +603,7 @@ impl Debug for ExternalSorter {
 
 pub fn sort_batch(
     batch: &RecordBatch,
-    expressions: &[PhysicalSortExpr],
+    expressions: LexOrderingRef,
     fetch: Option<usize>,
 ) -> Result<RecordBatch> {
     let sort_columns = expressions
@@ -678,7 +680,7 @@ pub struct SortExec {
     /// Input schema
     pub(crate) input: Arc<dyn ExecutionPlan>,
     /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
+    expr: LexOrdering,
     /// Containing all metrics set created during sort
     metrics_set: ExecutionPlanMetricsSet,
     /// Preserve partitions of input plan. If false, the input partitions
@@ -693,7 +695,7 @@ pub struct SortExec {
 impl SortExec {
     /// Create a new sort execution plan that produces a single,
     /// sorted output partition.
-    pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
+    pub fn new(expr: LexOrdering, input: Arc<dyn ExecutionPlan>) -> Self {
         let preserve_partitioning = false;
         let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning);
         Self {
@@ -760,8 +762,8 @@ impl SortExec {
     }
 
     /// Sort expressions
-    pub fn expr(&self) -> &[PhysicalSortExpr] {
-        &self.expr
+    pub fn expr(&self) -> LexOrderingRef {
+        self.expr.as_ref()
     }
 
     /// If `Some(fetch)`, limits output to only the first "fetch" items
@@ -818,13 +820,12 @@ impl DisplayAs for SortExec {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let expr = PhysicalSortExpr::format_list(&self.expr);
                 let preserve_partitioning = self.preserve_partitioning;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]",)
+                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)
                     }
-                    None => write!(f, "SortExec: expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]"),
+                    None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr),
                 }
             }
         }
@@ -1027,9 +1028,9 @@ mod tests {
     impl SortedUnboundedExec {
         fn compute_properties(schema: SchemaRef) -> PlanProperties {
             let mut eq_properties = EquivalenceProperties::new(schema);
-            eq_properties.add_new_orderings(vec![vec![PhysicalSortExpr::new_default(
-                Arc::new(Column::new("c1", 0)),
-            )]]);
+            eq_properties.add_new_orderings(vec![LexOrdering::new(vec![
+                PhysicalSortExpr::new_default(Arc::new(Column::new("c1", 0))),
+            ])]);
             let mode = ExecutionMode::Unbounded;
             PlanProperties::new(eq_properties, Partitioning::UnknownPartitioning(1), mode)
         }
@@ -1123,10 +1124,10 @@ mod tests {
         let schema = csv.schema();
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("i", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             Arc::new(CoalescePartitionsExec::new(csv)),
         ));
 
@@ -1166,10 +1167,10 @@ mod tests {
         let schema = input.schema();
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("i", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             Arc::new(CoalescePartitionsExec::new(input)),
         ));
 
@@ -1245,10 +1246,10 @@ mod tests {
 
             let sort_exec = Arc::new(
                 SortExec::new(
-                    vec![PhysicalSortExpr {
+                    LexOrdering::new(vec![PhysicalSortExpr {
                         expr: col("i", &schema)?,
                         options: SortOptions::default(),
-                    }],
+                    }]),
                     Arc::new(CoalescePartitionsExec::new(csv)),
                 )
                 .with_fetch(fetch),
@@ -1294,10 +1295,10 @@ mod tests {
         );
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("field_name", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             input,
         ));
 
@@ -1345,7 +1346,7 @@ mod tests {
         )?;
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: col("a", &schema)?,
                     options: SortOptions {
@@ -1360,7 +1361,7 @@ mod tests {
                         nulls_first: false,
                     },
                 },
-            ],
+            ]),
             Arc::new(MemoryExec::try_new(
                 &[vec![batch]],
                 Arc::clone(&schema),
@@ -1435,7 +1436,7 @@ mod tests {
         )?;
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![
+            LexOrdering::new(vec![
                 PhysicalSortExpr {
                     expr: col("a", &schema)?,
                     options: SortOptions {
@@ -1450,7 +1451,7 @@ mod tests {
                         nulls_first: false,
                     },
                 },
-            ],
+            ]),
             Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?),
         ));
 
@@ -1514,10 +1515,10 @@ mod tests {
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
         let sort_exec = Arc::new(SortExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("a", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             blocking_exec,
         ));
 
@@ -1545,12 +1546,12 @@ mod tests {
             RecordBatch::try_new_with_options(Arc::clone(&schema), vec![], &options)
                 .unwrap();
 
-        let expressions = vec![PhysicalSortExpr {
+        let expressions = LexOrdering::new(vec![PhysicalSortExpr {
             expr: Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
             options: SortOptions::default(),
-        }];
+        }]);
 
-        let result = sort_batch(&batch, &expressions, None).unwrap();
+        let result = sort_batch(&batch, expressions.as_ref(), None).unwrap();
         assert_eq!(result.num_rows(), 1);
     }
 
@@ -1564,9 +1565,9 @@ mod tests {
             cache: SortedUnboundedExec::compute_properties(Arc::new(schema.clone())),
         };
         let mut plan = SortExec::new(
-            vec![PhysicalSortExpr::new_default(Arc::new(Column::new(
+            LexOrdering::new(vec![PhysicalSortExpr::new_default(Arc::new(Column::new(
                 "c1", 0,
-            )))],
+            )))]),
             Arc::new(source),
         );
         plan = plan.with_fetch(Some(9));
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index f17161306c7a..ae39cfe412ba 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -21,7 +21,6 @@ use std::any::Any;
 use std::sync::Arc;
 
 use crate::common::spawn_buffered;
-use crate::expressions::PhysicalSortExpr;
 use crate::limit::LimitStream;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
@@ -35,7 +34,9 @@ use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalSortRequirement;
 
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{
+    LexOrdering, LexOrderingRef, LexRequirement,
+};
 use log::{debug, trace};
 
 /// Sort preserving merge execution plan
@@ -75,7 +76,7 @@ pub struct SortPreservingMergeExec {
     /// Input plan
     input: Arc<dyn ExecutionPlan>,
     /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
+    expr: LexOrdering,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Optional number of rows to fetch. Stops producing rows after this fetch
@@ -88,7 +89,7 @@ pub struct SortPreservingMergeExec {
 
 impl SortPreservingMergeExec {
     /// Create a new sort execution plan
-    pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
+    pub fn new(expr: LexOrdering, input: Arc<dyn ExecutionPlan>) -> Self {
         let cache = Self::compute_properties(&input, expr.clone());
         Self {
             input,
@@ -121,7 +122,7 @@ impl SortPreservingMergeExec {
     }
 
     /// Sort expressions
-    pub fn expr(&self) -> &[PhysicalSortExpr] {
+    pub fn expr(&self) -> LexOrderingRef {
         &self.expr
     }
 
@@ -133,7 +134,7 @@ impl SortPreservingMergeExec {
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
-        ordering: Vec<PhysicalSortExpr>,
+        ordering: LexOrdering,
     ) -> PlanProperties {
         let mut eq_properties = input.equivalence_properties().clone();
         eq_properties.clear_per_partition_constants();
@@ -154,11 +155,7 @@ impl DisplayAs for SortPreservingMergeExec {
     ) -> std::fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(
-                    f,
-                    "SortPreservingMergeExec: [{}]",
-                    PhysicalSortExpr::format_list(&self.expr)
-                )?;
+                write!(f, "SortPreservingMergeExec: [{}]", self.expr)?;
                 if let Some(fetch) = self.fetch {
                     write!(f, ", fetch={fetch}")?;
                 };
@@ -208,7 +205,9 @@ impl ExecutionPlan for SortPreservingMergeExec {
     }
 
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
-        vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))]
+        vec![Some(PhysicalSortRequirement::from_sort_exprs(
+            self.expr.iter(),
+        ))]
     }
 
     fn maintains_input_order(&self) -> Vec<bool> {
@@ -290,7 +289,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
                 let result = StreamingMergeBuilder::new()
                     .with_streams(receivers)
                     .with_schema(schema)
-                    .with_expressions(&self.expr)
+                    .with_expressions(self.expr.as_ref())
                     .with_metrics(BaselineMetrics::new(&self.metrics, partition))
                     .with_batch_size(context.session_config().batch_size())
                     .with_fetch(self.fetch)
@@ -354,6 +353,7 @@ mod tests {
     use datafusion_physical_expr::EquivalenceProperties;
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
+    use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
     use futures::{FutureExt, Stream, StreamExt};
     use tokio::time::timeout;
 
@@ -384,7 +384,7 @@ mod tests {
         let rbs = (0..1024).map(|_| rb.clone()).collect::<Vec<_>>();
 
         let schema = rb.schema();
-        let sort = vec![
+        let sort = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: col("b", &schema).unwrap(),
                 options: Default::default(),
@@ -393,7 +393,7 @@ mod tests {
                 expr: col("c", &schema).unwrap(),
                 options: Default::default(),
             },
-        ];
+        ]);
 
         let exec = MemoryExec::try_new(&[rbs], schema, None).unwrap();
         let repartition_exec =
@@ -486,7 +486,7 @@ mod tests {
         let batch = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
 
         let schema = batch.schema();
-        let sort = vec![]; // no sort expressions
+        let sort = LexOrdering::default(); // no sort expressions
         let exec = MemoryExec::try_new(&[vec![batch.clone()], vec![batch]], schema, None)
             .unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
@@ -665,7 +665,7 @@ mod tests {
         context: Arc<TaskContext>,
     ) {
         let schema = partitions[0][0].schema();
-        let sort = vec![
+        let sort = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: col("b", &schema).unwrap(),
                 options: Default::default(),
@@ -674,7 +674,7 @@ mod tests {
                 expr: col("c", &schema).unwrap(),
                 options: Default::default(),
             },
-        ];
+        ]);
         let exec = MemoryExec::try_new(partitions, schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
 
@@ -684,7 +684,7 @@ mod tests {
 
     async fn sorted_merge(
         input: Arc<dyn ExecutionPlan>,
-        sort: Vec<PhysicalSortExpr>,
+        sort: LexOrdering,
         context: Arc<TaskContext>,
     ) -> RecordBatch {
         let merge = Arc::new(SortPreservingMergeExec::new(sort, input));
@@ -695,7 +695,7 @@ mod tests {
 
     async fn partition_sort(
         input: Arc<dyn ExecutionPlan>,
-        sort: Vec<PhysicalSortExpr>,
+        sort: LexOrdering,
         context: Arc<TaskContext>,
     ) -> RecordBatch {
         let sort_exec =
@@ -705,7 +705,7 @@ mod tests {
 
     async fn basic_sort(
         src: Arc<dyn ExecutionPlan>,
-        sort: Vec<PhysicalSortExpr>,
+        sort: LexOrdering,
         context: Arc<TaskContext>,
     ) -> RecordBatch {
         let merge = Arc::new(CoalescePartitionsExec::new(src));
@@ -722,13 +722,13 @@ mod tests {
         let csv = test::scan_partitioned(partitions);
         let schema = csv.schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("i", &schema).unwrap(),
             options: SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        }];
+        }]);
 
         let basic =
             basic_sort(Arc::clone(&csv), sort.clone(), Arc::clone(&task_ctx)).await;
@@ -773,7 +773,7 @@ mod tests {
     }
 
     async fn sorted_partitioned_input(
-        sort: Vec<PhysicalSortExpr>,
+        sort: LexOrdering,
         sizes: &[usize],
         context: Arc<TaskContext>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
@@ -792,10 +792,10 @@ mod tests {
     async fn test_partition_sort_streaming_input() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
         let schema = make_partition(11).schema();
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("i", &schema).unwrap(),
             options: Default::default(),
-        }];
+        }]);
 
         let input =
             sorted_partitioned_input(sort.clone(), &[10, 3, 11], Arc::clone(&task_ctx))
@@ -822,10 +822,10 @@ mod tests {
     #[tokio::test]
     async fn test_partition_sort_streaming_input_output() -> Result<()> {
         let schema = make_partition(11).schema();
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("i", &schema).unwrap(),
             options: Default::default(),
-        }];
+        }]);
 
         // Test streaming with default batch size
         let task_ctx = Arc::new(TaskContext::default());
@@ -897,7 +897,7 @@ mod tests {
         let b2 = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
         let schema = b1.schema();
 
-        let sort = vec![
+        let sort = LexOrdering::new(vec![
             PhysicalSortExpr {
                 expr: col("b", &schema).unwrap(),
                 options: SortOptions {
@@ -912,7 +912,7 @@ mod tests {
                     nulls_first: false,
                 },
             },
-        ];
+        ]);
         let exec = MemoryExec::try_new(&[vec![b1], vec![b2]], schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
 
@@ -948,13 +948,13 @@ mod tests {
         let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
         let schema = batch.schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("b", &schema).unwrap(),
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
         let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap();
         let merge = Arc::new(
             SortPreservingMergeExec::new(sort, Arc::new(exec)).with_fetch(Some(2)),
@@ -984,13 +984,13 @@ mod tests {
         let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
         let schema = batch.schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("b", &schema).unwrap(),
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
         let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
 
@@ -1017,10 +1017,10 @@ mod tests {
     async fn test_async() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
         let schema = make_partition(11).schema();
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("i", &schema).unwrap(),
             options: SortOptions::default(),
-        }];
+        }]);
 
         let batches =
             sorted_partitioned_input(sort.clone(), &[5, 7, 3], Arc::clone(&task_ctx))
@@ -1056,7 +1056,7 @@ mod tests {
         let merge_stream = StreamingMergeBuilder::new()
             .with_streams(streams)
             .with_schema(batches.schema())
-            .with_expressions(sort.as_slice())
+            .with_expressions(sort.as_ref())
             .with_metrics(BaselineMetrics::new(&metrics, 0))
             .with_batch_size(task_ctx.session_config().batch_size())
             .with_fetch(fetch)
@@ -1096,10 +1096,10 @@ mod tests {
         let b2 = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
 
         let schema = b1.schema();
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("b", &schema).unwrap(),
             options: Default::default(),
-        }];
+        }]);
         let exec = MemoryExec::try_new(&[vec![b1], vec![b2]], schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
 
@@ -1155,10 +1155,10 @@ mod tests {
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 2));
         let refs = blocking_exec.refs();
         let sort_preserving_merge_exec = Arc::new(SortPreservingMergeExec::new(
-            vec![PhysicalSortExpr {
+            LexOrdering::new(vec![PhysicalSortExpr {
                 expr: col("a", &schema)?,
                 options: SortOptions::default(),
-            }],
+            }]),
             blocking_exec,
         ));
 
@@ -1203,13 +1203,13 @@ mod tests {
 
         let schema = partitions[0][0].schema();
 
-        let sort = vec![PhysicalSortExpr {
+        let sort = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("value", &schema).unwrap(),
             options: SortOptions {
                 descending: false,
                 nulls_first: true,
             },
-        }];
+        }]);
 
         let exec = MemoryExec::try_new(&partitions, schema, None).unwrap();
         let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
@@ -1272,7 +1272,7 @@ mod tests {
             eq_properties.add_new_orderings(vec![columns
                 .iter()
                 .map(|expr| PhysicalSortExpr::new_default(Arc::clone(expr)))
-                .collect::<Vec<_>>()]);
+                .collect::<LexOrdering>()]);
             let mode = ExecutionMode::Unbounded;
             PlanProperties::new(eq_properties, Partitioning::Hash(columns, 3), mode)
         }
@@ -1381,9 +1381,9 @@ mod tests {
             congestion_cleared: Arc::new(Mutex::new(false)),
         };
         let spm = SortPreservingMergeExec::new(
-            vec![PhysicalSortExpr::new_default(Arc::new(Column::new(
+            LexOrdering::new(vec![PhysicalSortExpr::new_default(Arc::new(Column::new(
                 "c1", 0,
-            )))],
+            )))]),
             Arc::new(source),
         );
         let spm_task = SpawnedTask::spawn(collect(Arc::new(spm), task_ctx));
diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
index c7924edfb1eb..70beb2c4a91b 100644
--- a/datafusion/physical-plan/src/sorts/stream.rs
+++ b/datafusion/physical-plan/src/sorts/stream.rs
@@ -24,6 +24,7 @@ use arrow::record_batch::RecordBatch;
 use arrow::row::{RowConverter, SortField};
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::MemoryReservation;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use futures::stream::{Fuse, StreamExt};
 use std::marker::PhantomData;
 use std::sync::Arc;
@@ -92,7 +93,7 @@ pub struct RowCursorStream {
 impl RowCursorStream {
     pub fn try_new(
         schema: &Schema,
-        expressions: &[PhysicalSortExpr],
+        expressions: LexOrderingRef,
         streams: Vec<SendableRecordBatchStream>,
         reservation: MemoryReservation,
     ) -> Result<Self> {
diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs
index e8330a7cabc0..bd74685eac94 100644
--- a/datafusion/physical-plan/src/sorts/streaming_merge.rs
+++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs
@@ -23,11 +23,12 @@ use crate::sorts::{
     merge::SortPreservingMergeStream,
     stream::{FieldCursorStream, RowCursorStream},
 };
-use crate::{PhysicalSortExpr, SendableRecordBatchStream};
+use crate::SendableRecordBatchStream;
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow_array::*;
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::memory_pool::MemoryReservation;
+use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 
 macro_rules! primitive_merge_helper {
     ($t:ty, $($v:ident),+) => {
@@ -54,7 +55,7 @@ macro_rules! merge_helper {
 pub struct StreamingMergeBuilder<'a> {
     streams: Vec<SendableRecordBatchStream>,
     schema: Option<SchemaRef>,
-    expressions: &'a [PhysicalSortExpr],
+    expressions: LexOrderingRef<'a>,
     metrics: Option<BaselineMetrics>,
     batch_size: Option<usize>,
     fetch: Option<usize>,
@@ -80,7 +81,7 @@ impl<'a> StreamingMergeBuilder<'a> {
         self
     }
 
-    pub fn with_expressions(mut self, expressions: &'a [PhysicalSortExpr]) -> Self {
+    pub fn with_expressions(mut self, expressions: LexOrderingRef<'a>) -> Self {
         self.expressions = expressions;
         self
     }
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index 9b46ad2ec7b1..14469ab6c0d9 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -24,6 +24,7 @@ use arrow::{
 use std::mem::size_of;
 use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
 
+use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
 use arrow_array::{Array, ArrayRef, RecordBatch};
 use arrow_schema::SchemaRef;
 use datafusion_common::Result;
@@ -32,10 +33,9 @@ use datafusion_execution::{
     runtime_env::RuntimeEnv,
 };
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use hashbrown::HashMap;
 
-use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
-
 use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder};
 
 /// Global TopK
@@ -101,7 +101,7 @@ impl TopK {
     pub fn try_new(
         partition_id: usize,
         schema: SchemaRef,
-        expr: Vec<PhysicalSortExpr>,
+        expr: LexOrdering,
         k: usize,
         batch_size: usize,
         runtime: Arc<RuntimeEnv>,
@@ -111,7 +111,7 @@ impl TopK {
         let reservation = MemoryConsumer::new(format!("TopK[{partition_id}]"))
             .register(&runtime.memory_pool);
 
-        let expr: Arc<[PhysicalSortExpr]> = expr.into();
+        let expr: Arc<[PhysicalSortExpr]> = expr.inner.into();
 
         let sort_fields: Vec<_> = expr
             .iter()
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 433dda870def..69cc63f8f65d 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -607,6 +607,7 @@ mod tests {
     use datafusion_common::ScalarValue;
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
     // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g)
     fn create_test_schema() -> Result<SchemaRef> {
@@ -625,14 +626,14 @@ mod tests {
     // Convert each tuple to PhysicalSortExpr
     fn convert_to_sort_exprs(
         in_data: &[(&Arc<dyn PhysicalExpr>, SortOptions)],
-    ) -> Vec<PhysicalSortExpr> {
+    ) -> LexOrdering {
         in_data
             .iter()
             .map(|(expr, options)| PhysicalSortExpr {
                 expr: Arc::clone(*expr),
                 options: *options,
             })
-            .collect::<Vec<_>>()
+            .collect::<LexOrdering>()
     }
 
     #[tokio::test]
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 6495657339fa..2c60be49a480 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -28,7 +28,6 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::utils::create_schema;
-use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::windows::{
     calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs,
@@ -60,7 +59,7 @@ use datafusion_physical_expr::window::{
     PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
 };
 use datafusion_physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use futures::stream::Stream;
 use futures::{ready, StreamExt};
 use hashbrown::raw::RawTable;
@@ -149,7 +148,7 @@ impl BoundedWindowAggExec {
     // We are sure that partition by columns are always at the beginning of sort_keys
     // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely
     // to calculate partition separation points
-    pub fn partition_by_sort_keys(&self) -> Result<Vec<PhysicalSortExpr>> {
+    pub fn partition_by_sort_keys(&self) -> Result<LexOrdering> {
         let partition_by = self.window_expr()[0].partition_by();
         get_partition_by_sort_exprs(
             &self.input,
@@ -261,7 +260,7 @@ impl ExecutionPlan for BoundedWindowAggExec {
             .ordered_partition_by_indices
             .iter()
             .map(|idx| &partition_bys[*idx]);
-        vec![calc_requirements(partition_bys, order_keys)]
+        vec![calc_requirements(partition_bys, order_keys.iter())]
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
@@ -707,7 +706,7 @@ impl LinearSearch {
 /// when computing partitions.
 pub struct SortedSearch {
     /// Stores partition by columns and their ordering information
-    partition_by_sort_keys: Vec<PhysicalSortExpr>,
+    partition_by_sort_keys: LexOrdering,
     /// Input ordering and partition by key ordering need not be the same, so
     /// this vector stores the mapping between them. For instance, if the input
     /// is ordered by a, b and the window expression contains a PARTITION BY b, a
@@ -1160,6 +1159,7 @@ mod tests {
     use std::time::Duration;
 
     use crate::common::collect;
+    use crate::expressions::PhysicalSortExpr;
     use crate::memory::MemoryExec;
     use crate::projection::ProjectionExec;
     use crate::streaming::{PartitionStream, StreamingTableExec};
@@ -1184,8 +1184,9 @@ mod tests {
     use datafusion_physical_expr::window::{
         BuiltInWindowExpr, BuiltInWindowFunctionExpr,
     };
-    use datafusion_physical_expr::{LexOrdering, PhysicalExpr, PhysicalSortExpr};
+    use datafusion_physical_expr::{LexOrdering, PhysicalExpr};
 
+    use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
     use futures::future::Shared;
     use futures::{pin_mut, ready, FutureExt, Stream, StreamExt};
     use itertools::Itertools;
@@ -1286,10 +1287,10 @@ mod tests {
             Arc::new(Column::new(schema.fields[0].name(), 0)) as Arc<dyn PhysicalExpr>;
         let args = vec![col_expr];
         let partitionby_exprs = vec![col(hash, &schema)?];
-        let orderby_exprs = vec![PhysicalSortExpr {
+        let orderby_exprs = LexOrdering::new(vec![PhysicalSortExpr {
             expr: col(order_by, &schema)?,
             options: SortOptions::default(),
-        }];
+        }]);
         let window_frame = WindowFrame::new_bounds(
             WindowFrameUnits::Range,
             WindowFrameBound::CurrentRow,
@@ -1306,7 +1307,7 @@ mod tests {
                 fn_name,
                 &args,
                 &partitionby_exprs,
-                &orderby_exprs,
+                orderby_exprs.as_ref(),
                 Arc::new(window_frame),
                 &input.schema(),
                 false,
@@ -1403,13 +1404,13 @@ mod tests {
     }
 
     fn schema_orders(schema: &SchemaRef) -> Result<Vec<LexOrdering>> {
-        let orderings = vec![vec![PhysicalSortExpr {
+        let orderings = vec![LexOrdering::new(vec![PhysicalSortExpr {
             expr: col("sn", schema)?,
             options: SortOptions {
                 descending: false,
                 nulls_first: false,
             },
-        }]];
+        }])];
         Ok(orderings)
     }
 
@@ -1552,7 +1553,7 @@ mod tests {
             Arc::new(BuiltInWindowExpr::new(
                 last_value_func,
                 &[],
-                &[],
+                LexOrderingRef::default(),
                 Arc::new(WindowFrame::new_bounds(
                     WindowFrameUnits::Rows,
                     WindowFrameBound::Preceding(ScalarValue::UInt64(None)),
@@ -1563,7 +1564,7 @@ mod tests {
             Arc::new(BuiltInWindowExpr::new(
                 nth_value_func1,
                 &[],
-                &[],
+                LexOrderingRef::default(),
                 Arc::new(WindowFrame::new_bounds(
                     WindowFrameUnits::Rows,
                     WindowFrameBound::Preceding(ScalarValue::UInt64(None)),
@@ -1574,7 +1575,7 @@ mod tests {
             Arc::new(BuiltInWindowExpr::new(
                 nth_value_func2,
                 &[],
-                &[],
+                LexOrderingRef::default(),
                 Arc::new(WindowFrame::new_bounds(
                     WindowFrameUnits::Rows,
                     WindowFrameBound::Preceding(ScalarValue::UInt64(None)),
@@ -1716,8 +1717,8 @@ mod tests {
         let plan = projection_exec(window)?;
 
         let expected_plan = vec![
-            "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]",
-            "  BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]",
+            "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]@2 as col_2]",
+            "  BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]",
             "    StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]",
         ];
 
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 7ebb7e71ec57..217823fb6a0a 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -53,7 +53,7 @@ use datafusion_physical_expr::expressions::Column;
 pub use datafusion_physical_expr::window::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr,
 };
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{LexOrderingRef, LexRequirement};
 pub use window_agg_exec::WindowAggExec;
 
 /// Build field from window function and add it into schema
@@ -98,7 +98,7 @@ pub fn create_window_expr(
     name: String,
     args: &[Arc<dyn PhysicalExpr>],
     partition_by: &[Arc<dyn PhysicalExpr>],
-    order_by: &[PhysicalSortExpr],
+    order_by: LexOrderingRef,
     window_frame: Arc<WindowFrame>,
     input_schema: &Schema,
     ignore_nulls: bool,
@@ -139,7 +139,7 @@ pub fn create_window_expr(
 /// Creates an appropriate [`WindowExpr`] based on the window frame and
 fn window_expr_from_aggregate_expr(
     partition_by: &[Arc<dyn PhysicalExpr>],
-    order_by: &[PhysicalSortExpr],
+    order_by: LexOrderingRef,
     window_frame: Arc<WindowFrame>,
     aggregate: Arc<AggregateFunctionExpr>,
 ) -> Arc<dyn WindowExpr> {
@@ -497,7 +497,7 @@ pub fn get_best_fitting_window(
 /// the mode this window operator should work in to accommodate the existing ordering.
 pub fn get_window_mode(
     partitionby_exprs: &[Arc<dyn PhysicalExpr>],
-    orderby_keys: &[PhysicalSortExpr],
+    orderby_keys: LexOrderingRef,
     input: &Arc<dyn ExecutionPlan>,
 ) -> Option<(bool, InputOrderMode)> {
     let input_eqs = input.equivalence_properties().clone();
@@ -516,9 +516,9 @@ pub fn get_window_mode(
     // Treat partition by exprs as constant. During analysis of requirements are satisfied.
     let const_exprs = partitionby_exprs.iter().map(ConstExpr::from);
     let partition_by_eqs = input_eqs.with_constants(const_exprs);
-    let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys);
+    let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys.iter());
     let reverse_order_by_reqs =
-        PhysicalSortRequirement::from_sort_exprs(&reverse_order_bys(orderby_keys));
+        PhysicalSortRequirement::from_sort_exprs(reverse_order_bys(orderby_keys).iter());
     for (should_swap, order_by_reqs) in
         [(false, order_by_reqs), (true, reverse_order_by_reqs)]
     {
@@ -699,7 +699,7 @@ mod tests {
                 "count".to_owned(),
                 &[col("a", &schema)?],
                 &[],
-                &[],
+                LexOrderingRef::default(),
                 Arc::new(WindowFrame::new(None)),
                 schema.as_ref(),
                 false,
@@ -896,7 +896,7 @@ mod tests {
                 partition_by_exprs.push(col(col_name, &test_schema)?);
             }
 
-            let mut order_by_exprs = vec![];
+            let mut order_by_exprs = LexOrdering::default();
             for col_name in order_by_params {
                 let expr = col(col_name, &test_schema)?;
                 // Give default ordering, this is same with input ordering direction
@@ -904,8 +904,11 @@ mod tests {
                 let options = SortOptions::default();
                 order_by_exprs.push(PhysicalSortExpr { expr, options });
             }
-            let res =
-                get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded);
+            let res = get_window_mode(
+                &partition_by_exprs,
+                order_by_exprs.as_ref(),
+                &exec_unbounded,
+            );
             // Since reversibility is not important in this test. Convert Option<(bool, InputOrderMode)> to Option<InputOrderMode>
             let res = res.map(|(_, mode)| mode);
             assert_eq!(
@@ -1058,7 +1061,7 @@ mod tests {
                 partition_by_exprs.push(col(col_name, &test_schema)?);
             }
 
-            let mut order_by_exprs = vec![];
+            let mut order_by_exprs = LexOrdering::default();
             for (col_name, descending, nulls_first) in order_by_params {
                 let expr = col(col_name, &test_schema)?;
                 let options = SortOptions {
@@ -1069,7 +1072,7 @@ mod tests {
             }
 
             assert_eq!(
-                get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded),
+                get_window_mode(&partition_by_exprs, order_by_exprs.as_ref(), &exec_unbounded),
                 *expected,
                 "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}"
             );
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index afe9700ed08c..1318f36f269e 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -23,7 +23,6 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::utils::create_schema;
-use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::windows::{
     calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs,
@@ -43,7 +42,7 @@ use datafusion_common::stats::Precision;
 use datafusion_common::utils::{evaluate_partition_ranges, transpose};
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
+use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use futures::{ready, Stream, StreamExt};
 
 /// Window execution plan
@@ -105,7 +104,7 @@ impl WindowAggExec {
     // We are sure that partition by columns are always at the beginning of sort_keys
     // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely
     // to calculate partition separation points
-    pub fn partition_by_sort_keys(&self) -> Result<Vec<PhysicalSortExpr>> {
+    pub fn partition_by_sort_keys(&self) -> Result<LexOrdering> {
         let partition_by = self.window_expr()[0].partition_by();
         get_partition_by_sort_exprs(
             &self.input,
@@ -195,13 +194,13 @@ impl ExecutionPlan for WindowAggExec {
         let partition_bys = self.window_expr()[0].partition_by();
         let order_keys = self.window_expr()[0].order_by();
         if self.ordered_partition_by_indices.len() < partition_bys.len() {
-            vec![calc_requirements(partition_bys, order_keys)]
+            vec![calc_requirements(partition_bys, order_keys.iter())]
         } else {
             let partition_bys = self
                 .ordered_partition_by_indices
                 .iter()
                 .map(|idx| &partition_bys[*idx]);
-            vec![calc_requirements(partition_bys, order_keys)]
+            vec![calc_requirements(partition_bys, order_keys.iter())]
         }
     }
 
@@ -282,7 +281,7 @@ pub struct WindowAggStream {
     batches: Vec<RecordBatch>,
     finished: bool,
     window_expr: Vec<Arc<dyn WindowExpr>>,
-    partition_by_sort_keys: Vec<PhysicalSortExpr>,
+    partition_by_sort_keys: LexOrdering,
     baseline_metrics: BaselineMetrics,
     ordered_partition_by_indices: Vec<usize>,
 }
@@ -294,7 +293,7 @@ impl WindowAggStream {
         window_expr: Vec<Arc<dyn WindowExpr>>,
         input: SendableRecordBatchStream,
         baseline_metrics: BaselineMetrics,
-        partition_by_sort_keys: Vec<PhysicalSortExpr>,
+        partition_by_sort_keys: LexOrdering,
         ordered_partition_by_indices: Vec<usize>,
     ) -> Result<Self> {
         // In WindowAggExec all partition by columns should be ordered.
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 20ec5eeaeaf8..316166042fc4 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -35,7 +35,7 @@ use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{FileScanConfig, FileSinkConfig};
 use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::WindowFunctionDefinition;
-use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
+use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
     Literal, NegativeExpr, NotExpr, TryCastExpr,
@@ -99,13 +99,13 @@ pub fn parse_physical_sort_exprs(
     registry: &dyn FunctionRegistry,
     input_schema: &Schema,
     codec: &dyn PhysicalExtensionCodec,
-) -> Result<Vec<PhysicalSortExpr>> {
+) -> Result<LexOrdering> {
     proto
         .iter()
         .map(|sort_expr| {
             parse_physical_sort_expr(sort_expr, registry, input_schema, codec)
         })
-        .collect::<Result<Vec<_>>>()
+        .collect::<Result<LexOrdering>>()
 }
 
 /// Parses a physical window expr from a protobuf.
@@ -175,7 +175,7 @@ pub fn parse_physical_window_expr(
         name,
         &window_node_expr,
         &partition_by,
-        &order_by,
+        order_by.as_ref(),
         Arc::new(window_frame),
         &extended_schema,
         false,
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 326c7acab392..e84eae2b9082 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -35,7 +35,7 @@ use datafusion::datasource::physical_plan::{AvroExec, CsvExec};
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::FunctionRegistry;
 use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
-use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
+use datafusion::physical_expr::{LexOrdering, PhysicalExprRef, PhysicalSortRequirement};
 use datafusion::physical_plan::aggregates::AggregateMode;
 use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
 use datafusion::physical_plan::analyze::AnalyzeExec;
@@ -501,8 +501,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             ExprType::AggregateExpr(agg_node) => {
                                 let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node.expr.iter()
                                     .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
-                                let ordering_req: Vec<PhysicalSortExpr> = agg_node.ordering_req.iter()
-                                    .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
+                                let ordering_req: LexOrdering = agg_node.ordering_req.iter()
+                                    .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec))
+                                    .collect::<Result<LexOrdering>>()?;
                                 agg_node.aggregate_function.as_ref().map(|func| {
                                     match func {
                                         AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
@@ -874,7 +875,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             )
                         }
                     })
-                    .collect::<Result<Vec<_>, _>>()?;
+                    .collect::<Result<LexOrdering, _>>()?;
                 let fetch = if sort.fetch < 0 {
                     None
                 } else {
@@ -921,7 +922,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             )
                         }
                     })
-                    .collect::<Result<Vec<_>, _>>()?;
+                    .collect::<Result<LexOrdering, _>>()?;
                 let fetch = if sort.fetch < 0 {
                     None
                 } else {
@@ -1036,7 +1037,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             &sink_schema,
                             extension_codec,
                         )
-                        .map(|item| PhysicalSortRequirement::from_sort_exprs(&item))
+                        .map(|item| PhysicalSortRequirement::from_sort_exprs(&item.inner))
                     })
                     .transpose()?;
                 Ok(Arc::new(DataSinkExec::new(
@@ -1066,7 +1067,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                             &sink_schema,
                             extension_codec,
                         )
-                        .map(|item| PhysicalSortRequirement::from_sort_exprs(&item))
+                        .map(|item| PhysicalSortRequirement::from_sort_exprs(&item.inner))
                     })
                     .transpose()?;
                 Ok(Arc::new(DataSinkExec::new(
@@ -1103,7 +1104,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                                 &sink_schema,
                                 extension_codec,
                             )
-                            .map(|item| PhysicalSortRequirement::from_sort_exprs(&item))
+                            .map(|item| {
+                                PhysicalSortRequirement::from_sort_exprs(&item.inner)
+                            })
                         })
                         .transpose()?;
                     Ok(Arc::new(DataSinkExec::new(
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 89a2403922e9..4bf7e353326e 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 #[cfg(feature = "parquet")]
 use datafusion::datasource::file_format::parquet::ParquetSink;
 use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr};
-use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
+use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
     Literal, NegativeExpr, NotExpr, NthValue, TryCastExpr,
@@ -52,7 +52,10 @@ pub fn serialize_physical_aggr_expr(
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<protobuf::PhysicalExprNode> {
     let expressions = serialize_physical_exprs(&aggr_expr.expressions(), codec)?;
-    let ordering_req = aggr_expr.order_bys().unwrap_or(&[]).to_vec();
+    let ordering_req = match aggr_expr.order_bys() {
+        Some(order) => LexOrdering::from_ref(order),
+        None => LexOrdering::default(),
+    };
     let ordering_req = serialize_physical_sort_exprs(ordering_req, codec)?;
 
     let name = aggr_expr.fun().name().to_string();
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 4a9bf6afb49e..1e078ee410c6 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -52,7 +52,8 @@ use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
 use datafusion::physical_expr::expressions::Literal;
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{
-    LexRequirement, PhysicalSortRequirement, ScalarFunctionExpr,
+    LexOrdering, LexOrderingRef, LexRequirement, PhysicalSortRequirement,
+    ScalarFunctionExpr,
 };
 use datafusion::physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
@@ -307,7 +308,7 @@ fn roundtrip_window() -> Result<()> {
         .build()
         .map(Arc::new)?,
         &[],
-        &[],
+        LexOrderingRef::default(),
         Arc::new(WindowFrame::new(None)),
     ));
 
@@ -327,7 +328,7 @@ fn roundtrip_window() -> Result<()> {
     let sliding_aggr_window_expr = Arc::new(SlidingAggregateWindowExpr::new(
         sum_expr,
         &[],
-        &[],
+        LexOrderingRef::default(),
         Arc::new(window_frame),
     ));
 
@@ -459,13 +460,13 @@ fn rountrip_aggregate_with_sort() -> Result<()> {
 
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
-    let sort_exprs = vec![PhysicalSortExpr {
+    let sort_exprs = LexOrdering::new(vec![PhysicalSortExpr {
         expr: col("b", &schema)?,
         options: SortOptions {
             descending: false,
             nulls_first: true,
         },
-    }];
+    }]);
 
     let aggregates =
         vec![
@@ -585,7 +586,7 @@ fn roundtrip_sort() -> Result<()> {
     let field_a = Field::new("a", DataType::Boolean, false);
     let field_b = Field::new("b", DataType::Int64, false);
     let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-    let sort_exprs = vec![
+    let sort_exprs = LexOrdering::new(vec![
         PhysicalSortExpr {
             expr: col("a", &schema)?,
             options: SortOptions {
@@ -600,7 +601,7 @@ fn roundtrip_sort() -> Result<()> {
                 nulls_first: true,
             },
         },
-    ];
+    ]);
     roundtrip_test(Arc::new(SortExec::new(
         sort_exprs,
         Arc::new(EmptyExec::new(schema)),
@@ -612,7 +613,7 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> {
     let field_a = Field::new("a", DataType::Boolean, false);
     let field_b = Field::new("b", DataType::Int64, false);
     let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-    let sort_exprs = vec![
+    let sort_exprs = LexOrdering::new(vec![
         PhysicalSortExpr {
             expr: col("a", &schema)?,
             options: SortOptions {
@@ -627,7 +628,7 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> {
                 nulls_first: true,
             },
         },
-    ];
+    ]);
 
     roundtrip_test(Arc::new(SortExec::new(
         sort_exprs.clone(),
@@ -1013,7 +1014,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
         vec![Arc::new(PlainAggregateWindowExpr::new(
             aggr_expr.clone(),
             &[col("author", &schema)?],
-            &[],
+            LexOrderingRef::default(),
             Arc::new(WindowFrame::new(None)),
         ))],
         filter,
@@ -1074,7 +1075,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         vec![Arc::new(PlainAggregateWindowExpr::new(
             aggr_expr,
             &[col("author", &schema)?],
-            &[],
+            LexOrderingRef::default(),
             Arc::new(WindowFrame::new(None)),
         ))],
         filter,
@@ -1298,17 +1299,17 @@ fn roundtrip_sym_hash_join() -> Result<()> {
         ] {
             for left_order in &[
                 None,
-                Some(vec![PhysicalSortExpr {
+                Some(LexOrdering::new(vec![PhysicalSortExpr {
                     expr: Arc::new(Column::new("col", schema_left.index_of("col")?)),
                     options: Default::default(),
-                }]),
+                }])),
             ] {
                 for right_order in &[
                     None,
-                    Some(vec![PhysicalSortExpr {
+                    Some(LexOrdering::new(vec![PhysicalSortExpr {
                         expr: Arc::new(Column::new("col", schema_right.index_of("col")?)),
                         options: Default::default(),
-                    }]),
+                    }])),
                 ] {
                     roundtrip_test(Arc::new(
                         datafusion::physical_plan::joins::SymmetricHashJoinExec::try_new(
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index aac315ea0efd..917e037682f2 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -146,7 +146,7 @@ physical_plan
 01)AggregateExec: mode=Final, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
 03)----AggregateExec: mode=Partial, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
-04)------SortExec: expr=[c2@1 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[true]
+04)------SortExec: expr=[c2@1 DESC, c3@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_agg_multi_order.csv]]}, projection=[c1, c2, c3], has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index 60569803322c..53ca8d81b9e4 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -158,7 +158,7 @@ logical_plan
 07)--------Filter: balances.time < Int64(10)
 08)----------TableScan: balances
 physical_plan
-01)SortExec: expr=[time@0 ASC NULLS LAST,name@1 ASC NULLS LAST,account_balance@2 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: expr=[time@0 ASC NULLS LAST, name@1 ASC NULLS LAST, account_balance@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--RecursiveQueryExec: name=balances, is_distinct=false
 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/balance.csv]]}, projection=[time, name, account_balance], has_header=true
 04)----CoalescePartitionsExec
diff --git a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
index 3b4deae3326f..d96044fda8c0 100644
--- a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
+++ b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt
@@ -37,7 +37,7 @@ logical_plan
 02)--Filter: data.ticker = Utf8("A")
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
-01)SortPreservingMergeExec: [date@0 ASC NULLS LAST,time@2 ASC NULLS LAST]
+01)SortPreservingMergeExec: [date@0 ASC NULLS LAST, time@2 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: ticker@1 = A
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
@@ -105,7 +105,7 @@ logical_plan
 02)--Filter: data.ticker = Utf8("A") AND CAST(data.time AS Date32) = data.date
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
-01)SortPreservingMergeExec: [time@2 ASC NULLS LAST,date@0 ASC NULLS LAST]
+01)SortPreservingMergeExec: [time@2 ASC NULLS LAST, date@0 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
@@ -146,7 +146,7 @@ logical_plan
 02)--Filter: data.date = Date32("2006-01-02")
 03)----TableScan: data projection=[date, ticker, time]
 physical_plan
-01)SortPreservingMergeExec: [ticker@1 ASC NULLS LAST,time@2 ASC NULLS LAST]
+01)SortPreservingMergeExec: [ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: date@0 = 2006-01-02
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 61b3ad73cd0a..daf270190870 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2250,7 +2250,7 @@ logical_plan
 01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST
 02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
-01)PartialSortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2]
+01)PartialSortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], common_prefix_length=[2]
 02)--StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 query TT
@@ -2263,7 +2263,7 @@ logical_plan
 01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST, fetch=50
 02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d]
 physical_plan
-01)PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2]
+01)PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], common_prefix_length=[2]
 02)--StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 query TT
@@ -2275,7 +2275,7 @@ logical_plan
 01)Sort: multiple_ordered_table.a ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST
 02)--TableScan: multiple_ordered_table projection=[a0, a, b, c, d]
 physical_plan
-01)SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], preserve_partitioning=[false]
 02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], has_header=true
 
 query TT
@@ -2288,7 +2288,7 @@ logical_plan
 02)--TableScan: annotated_data_infinite2 projection=[a, b, d]
 physical_plan
 01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[array_agg(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]], ordering_mode=Sorted
-02)--PartialSortExec: expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST,d@2 ASC NULLS LAST], common_prefix_length=[2]
+02)--PartialSortExec: expr=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, d@2 ASC NULLS LAST], common_prefix_length=[2]
 03)----StreamingTableExec: partition_sizes=1, projection=[a, b, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 
 # as can be seen in the result below d is indeed ordered.
@@ -2535,7 +2535,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
-03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST, amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
@@ -2573,7 +2573,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, sum(s.amount)@3 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=PartiallySorted([0])
-03)----SortExec: TopK(fetch=10), expr=[country@1 ASC NULLS LAST,amount@2 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@1 ASC NULLS LAST, amount@2 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TI?R rowsort
@@ -2646,7 +2646,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1]
 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted
-03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST, amount@1 DESC], preserve_partitioning=[false]
 04)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
@@ -4328,7 +4328,7 @@ logical_plan
 02)--Projection: unbounded_csv_with_timestamps2.name, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps2.ts) AS time_chunks
 03)----TableScan: unbounded_csv_with_timestamps2 projection=[name, ts]
 physical_plan
-01)SortPreservingMergeExec: [name@0 DESC,time_chunks@1 DESC], fetch=5
+01)SortPreservingMergeExec: [name@0 DESC, time_chunks@1 DESC], fetch=5
 02)--ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks]
 03)----RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 04)------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC]
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 230ea4d98fc3..804612287246 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -69,7 +69,7 @@ physical_plan
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
@@ -129,7 +129,7 @@ physical_plan
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
@@ -180,7 +180,7 @@ physical_plan
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index c40f62c3ba80..35decd728eed 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -126,7 +126,7 @@ logical_plan
 03)----Values: (Int64(5), Int64(1)), (Int64(4), Int64(2)), (Int64(7), Int64(7)), (Int64(7), Int64(8)), (Int64(7), Int64(9))...
 physical_plan
 01)DataSinkExec: sink=CsvSink(file_groups=[])
-02)--SortExec: expr=[a@0 ASC NULLS LAST,b@1 DESC], preserve_partitioning=[false]
+02)--SortExec: expr=[a@0 ASC NULLS LAST, b@1 DESC], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[column1@0 as a, column2@1 as b]
 04)------ValuesExec
 
@@ -358,7 +358,7 @@ physical_plan
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
@@ -419,7 +419,7 @@ physical_plan
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index eb47ccdd43ec..44c5ed0d2dab 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -95,7 +95,7 @@ logical_plan
 08)--------Filter: annotated_data.d = Int32(3)
 09)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
 physical_plan
-01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
+01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index bc40f845cc8a..93bb1f1f548e 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3243,13 +3243,13 @@ physical_plan
 01)SortPreservingMergeExec: [rn1@5 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
+04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
 07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 09)----CoalesceBatchesExec: target_batch_size=2
-10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
+10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
 11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 12)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
@@ -3277,11 +3277,11 @@ physical_plan
 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Right, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
+04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
+08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
 11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
@@ -3315,8 +3315,8 @@ logical_plan
 09)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 10)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)SortPreservingMergeExec: [a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST]
-02)--SortExec: expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST]
+02)--SortExec: expr=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST], preserve_partitioning=[true]
 03)----SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
 04)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=2
diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
index d41b78dcd3f2..abf48fac5364 100644
--- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
+++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
@@ -44,7 +44,7 @@ logical_plan
 02)--Projection: CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
 physical_plan
-01)SortPreservingMergeExec: [a_big@0 ASC NULLS LAST,b@1 ASC NULLS LAST]
+01)SortPreservingMergeExec: [a_big@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[CAST(a@0 AS Int64) as a_big, b@1 as b]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
@@ -60,7 +60,7 @@ logical_plan
 02)--Projection: multiple_ordered_table.a, CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
 physical_plan
-01)SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@2 ASC NULLS LAST]
+01)SortPreservingMergeExec: [a@0 ASC NULLS LAST, b@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
@@ -81,7 +81,7 @@ logical_plan
 02)--Projection: multiple_ordered_table.a, CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
 physical_plan
-01)SortPreservingMergeExec: [a_big@1 ASC NULLS LAST,b@2 ASC NULLS LAST]
+01)SortPreservingMergeExec: [a_big@1 ASC NULLS LAST, b@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
@@ -132,8 +132,8 @@ logical_plan
 02)--Projection: CAST(multiple_ordered_table.a AS Utf8) AS a_str, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
 physical_plan
-01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST,b@1 ASC NULLS LAST]
-02)--SortExec: expr=[a_str@0 ASC NULLS LAST,b@1 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
+02)--SortExec: expr=[a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[CAST(a@0 AS Utf8) as a_str, b@1 as b]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index dd73447f8b25..d5f0521407c5 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -165,7 +165,7 @@ logical_plan
 03)----TableScan: aggregate_test_100 projection=[c1, c2, c3]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
-02)--SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST], preserve_partitioning=[false]
+02)--SortExec: expr=[c2@1 ASC NULLS LAST, c3@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
 
 query II
@@ -691,7 +691,7 @@ logical_plan
 01)Sort: t1.id DESC NULLS FIRST, t1.name ASC NULLS LAST
 02)--TableScan: t1 projection=[id, name]
 physical_plan
-01)SortExec: expr=[id@0 DESC,name@1 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: expr=[id@0 DESC, name@1 ASC NULLS LAST], preserve_partitioning=[false]
 02)--MemoryExec: partitions=1, partition_sizes=[1]
 
 query IT
@@ -710,7 +710,7 @@ logical_plan
 01)Sort: t1.id ASC NULLS LAST, t1.name ASC NULLS LAST
 02)--TableScan: t1 projection=[id, name]
 physical_plan
-01)SortExec: expr=[id@0 ASC NULLS LAST,name@1 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: expr=[id@0 ASC NULLS LAST, name@1 ASC NULLS LAST], preserve_partitioning=[false]
 02)--MemoryExec: partitions=1, partition_sizes=[1]
 
 
@@ -776,8 +776,8 @@ logical_plan
 13)------------Projection: column1 AS t
 14)--------------Values: (Int64(0)), (Int64(1))
 physical_plan
-01)SortPreservingMergeExec: [m@0 ASC NULLS LAST,t@1 ASC NULLS LAST]
-02)--SortExec: expr=[m@0 ASC NULLS LAST,t@1 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [m@0 ASC NULLS LAST, t@1 ASC NULLS LAST]
+02)--SortExec: expr=[m@0 ASC NULLS LAST, t@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----InterleaveExec
 04)------ProjectionExec: expr=[0 as m, t@0 as t]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
@@ -1237,12 +1237,12 @@ logical_plan
 09)----------TableScan: ordered_table projection=[a0, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[b@0 as b, c@1 as c, a@2 as a, a0@3 as a0]
-02)--SortPreservingMergeExec: [d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a@2 ASC NULLS LAST,a0@3 ASC NULLS LAST,b@0 ASC NULLS LAST], fetch=2
+02)--SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2
 03)----UnionExec
-04)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a@2 ASC NULLS LAST,b@0 ASC NULLS LAST], preserve_partitioning=[false]
+04)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], has_header=true
-07)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a0@3 ASC NULLS LAST,b@0 ASC NULLS LAST], preserve_partitioning=[false]
+07)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d]
 09)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 656cfcbe076d..ed963466fca6 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -89,8 +89,8 @@ logical_plan
 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST
 02)--TableScan: test_table projection=[int_col, string_col]
 physical_plan
-01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST]
-02)--SortExec: expr=[string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
+02)--SortExec: expr=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col]
 
 # Tear down test_table:
@@ -119,7 +119,7 @@ logical_plan
 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST
 02)--TableScan: test_table projection=[int_col, string_col]
 physical_plan
-01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST]
+01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
 02)--ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col], output_ordering=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
 
 # Add another file to the directory underlying test_table
@@ -141,8 +141,8 @@ logical_plan
 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST
 02)--TableScan: test_table projection=[int_col, string_col]
 physical_plan
-01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST]
-02)--SortExec: expr=[string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST]
+02)--SortExec: expr=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/2.parquet]]}, projection=[int_col, string_col]
 
 
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index d7ff51011b65..c096f6e692af 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1439,7 +1439,7 @@ logical_plan
 02)--Filter: annotated_data_finite2.a = Int32(0)
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0)]
 physical_plan
-01)SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: a@1 = 0
 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -1481,7 +1481,7 @@ logical_plan
 02)--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b = Int32(0)
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)]
 physical_plan
-01)SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: a@1 = 0 AND b@2 = 0
 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -1502,7 +1502,7 @@ logical_plan
 02)--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b = Int32(0)
 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)]
 physical_plan
-01)SortPreservingMergeExec: [a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+01)SortPreservingMergeExec: [a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: a@1 = 0 AND b@2 = 0
 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -1553,7 +1553,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[count(*)]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------ProjectionExec: expr=[c2@0 as c2]
-07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c1], has_header=true
 
 # FilterExec can track equality of non-column expressions.
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
index e4360a9269ca..a3717dd838d6 100644
--- a/datafusion/sqllogictest/test_files/subquery_sort.slt
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -65,8 +65,8 @@ logical_plan
 05)--------TableScan: sink_table projection=[c1, c2, c3, c9]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
-02)--SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false]
-03)----SortExec: TopK(fetch=2), expr=[c1@0 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[false]
+02)--SortExec: expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
+03)----SortExec: TopK(fetch=2), expr=[c1@0 DESC, c3@2 ASC NULLS LAST], preserve_partitioning=[false]
 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], has_header=true
 
 
@@ -98,7 +98,7 @@ logical_plan
 07)------------TableScan: sink_table projection=[c1, c3, c9]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
-02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false]
+02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
 04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Utf8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
@@ -118,8 +118,8 @@ logical_plan
 07)------------TableScan: sink_table projection=[c1, c2, c3, c9]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
-02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST]
-03)----SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST], preserve_partitioning=[true]
+02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c3@2 DESC, c9@3 ASC NULLS LAST]
+03)----SortExec: expr=[c1@0 ASC NULLS LAST, c3@2 DESC, c9@3 ASC NULLS LAST], preserve_partitioning=[true]
 04)------ProjectionExec: expr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@1 as c1, first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@2 as c2, first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@3 as c3, first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@4 as c9]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]]
 06)----------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q1.slt.part b/datafusion/sqllogictest/test_files/tpch/q1.slt.part
index 8cfd25d26c07..4d4323e93e9e 100644
--- a/datafusion/sqllogictest/test_files/tpch/q1.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q1.slt.part
@@ -47,8 +47,8 @@ logical_plan
 05)--------Filter: lineitem.l_shipdate <= Date32("1998-09-02")
 06)----------TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-02")]
 physical_plan
-01)SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST]
-02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST]
+02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order]
 04)------AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/q13.slt.part
index bb32fb209700..2a9fb12a31c2 100644
--- a/datafusion/sqllogictest/test_files/tpch/q13.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q13.slt.part
@@ -53,8 +53,8 @@ logical_plan
 11)------------------Filter: orders.o_comment NOT LIKE Utf8("%special%requests%")
 12)--------------------TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%special%requests%")]
 physical_plan
-01)SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [custdist@1 DESC, c_count@0 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC, c_count@0 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist]
 04)------AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q16.slt.part b/datafusion/sqllogictest/test_files/tpch/q16.slt.part
index 8058371764f2..6b2c2f7fdc3e 100644
--- a/datafusion/sqllogictest/test_files/tpch/q16.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q16.slt.part
@@ -65,8 +65,8 @@ logical_plan
 13)--------------Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
 14)----------------TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")]
 physical_plan
-01)SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt]
 04)------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q18.slt.part b/datafusion/sqllogictest/test_files/tpch/q18.slt.part
index e78b0d87f651..c80352c5d36a 100644
--- a/datafusion/sqllogictest/test_files/tpch/q18.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q18.slt.part
@@ -67,8 +67,8 @@ logical_plan
 14)------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]]
 15)--------------TableScan: lineitem projection=[l_orderkey, l_quantity]
 physical_plan
-01)SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST]
-02)--SortExec: expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST]
+02)--SortExec: expr=[o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)]
 04)------CoalesceBatchesExec: target_batch_size=8192
 05)--------RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4
diff --git a/datafusion/sqllogictest/test_files/tpch/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
index 17f3b78a089d..23ffa0d226b8 100644
--- a/datafusion/sqllogictest/test_files/tpch/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
@@ -99,8 +99,8 @@ logical_plan
 34)------------------Filter: region.r_name = Utf8("EUROPE")
 35)--------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 physical_plan
-01)SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
 04)------CoalesceBatchesExec: target_batch_size=8192
 05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8]
diff --git a/datafusion/sqllogictest/test_files/tpch/q21.slt.part b/datafusion/sqllogictest/test_files/tpch/q21.slt.part
index 5cf069ec7248..93dcd4c68052 100644
--- a/datafusion/sqllogictest/test_files/tpch/q21.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q21.slt.part
@@ -90,8 +90,8 @@ logical_plan
 30)----------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate
 31)------------------TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate]
 physical_plan
-01)SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST]
-02)--SortExec: expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [numwait@1 DESC, s_name@0 ASC NULLS LAST]
+02)--SortExec: expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait]
 04)------AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q3.slt.part b/datafusion/sqllogictest/test_files/tpch/q3.slt.part
index 16a1c2b6ebb1..289e9c7732bb 100644
--- a/datafusion/sqllogictest/test_files/tpch/q3.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q3.slt.part
@@ -58,8 +58,8 @@ logical_plan
 14)------------Filter: lineitem.l_shipdate > Date32("1995-03-15")
 15)--------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")]
 physical_plan
-01)SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority]
 04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q7.slt.part b/datafusion/sqllogictest/test_files/tpch/q7.slt.part
index 5a6cef5311d4..a16af4710478 100644
--- a/datafusion/sqllogictest/test_files/tpch/q7.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q7.slt.part
@@ -84,8 +84,8 @@ logical_plan
 24)--------------Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE")
 25)----------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE")]
 physical_plan
-01)SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST]
-02)--SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST]
+02)--SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue]
 04)------AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/tpch/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/q9.slt.part
index b3631f07cc8f..c4910beb842b 100644
--- a/datafusion/sqllogictest/test_files/tpch/q9.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q9.slt.part
@@ -75,8 +75,8 @@ logical_plan
 21)----------------TableScan: orders projection=[o_orderkey, o_orderdate]
 22)------------TableScan: nation projection=[n_nationkey, n_name]
 physical_plan
-01)SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true]
+01)SortPreservingMergeExec: [nation@0 ASC NULLS LAST, o_year@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit]
 04)------AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)]
 05)--------CoalesceBatchesExec: target_batch_size=8192
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 29ff62ab34f9..d593a985c458 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -361,7 +361,7 @@ physical_plan
 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
 05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
 09)----------------UnionExec
@@ -1244,7 +1244,7 @@ physical_plan
 02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST], preserve_partitioning=[false]
+05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true
 
 
@@ -1265,7 +1265,7 @@ physical_plan
 02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false]
+05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
 
@@ -1288,9 +1288,9 @@ physical_plan
 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c9@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-07)------------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false]
+07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
 # test_window_partition_by_order_by
@@ -1312,12 +1312,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
 02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-03)----SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
 07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 09)----------------CoalesceBatchesExec: target_batch_size=4096
 10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
 11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -1470,10 +1470,10 @@ physical_plan
 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-04)------SortExec: expr=[c9@2 ASC NULLS LAST,c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+04)------SortExec: expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-07)------------SortExec: expr=[c9@2 DESC,c1@0 DESC], preserve_partitioning=[false]
+07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], has_header=true
 
 query IIII
@@ -1554,17 +1554,17 @@ physical_plan
 03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[c3@2 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-08)--------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 ASC], preserve_partitioning=[false]
+08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false]
 09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-10)------------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 DESC], preserve_partitioning=[false]
+10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false]
 11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
 12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false]
 14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-16)------------------------------SortExec: expr=[c3@2 DESC,c1@0 ASC NULLS LAST], preserve_partitioning=[false]
+16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 17)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@@ -1639,7 +1639,7 @@ physical_plan
 02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[false]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
 
 
@@ -1683,7 +1683,7 @@ physical_plan
 02)--GlobalLimitExec: skip=0, fetch=5
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[false]
+05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
 
 query III
@@ -1730,8 +1730,8 @@ physical_plan
 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC,c9@3 DESC,c2@1 ASC NULLS LAST]
-07)------------SortExec: expr=[__common_expr_1@0 DESC,c9@3 DESC,c2@1 ASC NULLS LAST], preserve_partitioning=[true]
+06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST]
+07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9]
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c4, c9], has_header=true
@@ -1823,13 +1823,13 @@ physical_plan
 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-04)------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true]
+04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-10)------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
 
 
@@ -1991,7 +1991,7 @@ logical_plan
 03)----WindowAggr: windowExpr=[[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 04)------TableScan: aggregate_test_100 projection=[c1]
 physical_plan
-01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST,rn1@1 ASC NULLS LAST]
+01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
 03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
@@ -2023,7 +2023,7 @@ physical_plan
 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted]
-07)------------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[true]
+07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------CoalesceBatchesExec: target_batch_size=4096
 09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
 10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -2112,7 +2112,7 @@ physical_plan
 05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
 
 
@@ -2168,7 +2168,7 @@ physical_plan
 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]
 07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-09)----------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false]
+09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
 11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
 
@@ -2211,7 +2211,7 @@ physical_plan
 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)), is_causal: true }], mode=[Sorted]
 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-07)------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], has_header=true
 
 query RR
@@ -2356,7 +2356,7 @@ logical_plan
 03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
@@ -3051,15 +3051,15 @@ physical_plan
 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Sorted]
-04)------SortExec: expr=[d@4 ASC NULLS LAST,a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-06)----------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-08)--------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
-10)------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted]
-12)----------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false]
+12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted]
 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
@@ -3144,7 +3144,7 @@ logical_plan
 03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST,c9@0 DESC], preserve_partitioning=[false]
+01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST, c9@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
@@ -3264,17 +3264,17 @@ physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
 02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]
 03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST,a@1 ASC NULLS LAST
+04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 07)------------CoalesceBatchesExec: target_batch_size=4096
-08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST
+08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[PartiallySorted([0])]
 10)------------------CoalesceBatchesExec: target_batch_size=4096
-11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST
+11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 13)------------------------CoalesceBatchesExec: target_batch_size=4096
-14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST
+14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 17)--------------------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
@@ -3624,7 +3624,7 @@ physical_plan
 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d]
 03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Ok(Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]
 04)------CoalesceBatchesExec: target_batch_size=4096
-05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
+05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 07)------------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]]
 

From 24d953e19145e5202fd57973a9cd62e7ec96795b Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Fri, 1 Nov 2024 20:17:31 -0400
Subject: [PATCH 30/32] fix: array_resize null fix (#13209)

* array_resize null fix

* comment

* clippy

* fixes
---
 datafusion/functions-nested/src/resize.rs    | 36 ++++++++++++--
 datafusion/sqllogictest/test_files/array.slt | 49 +++++++++++++++++++-
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs
index 294076a52b52..b0255e7be2a3 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -19,8 +19,10 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{Capacities, MutableArrayData};
-use arrow_array::{ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait};
-use arrow_buffer::{ArrowNativeType, OffsetBuffer};
+use arrow_array::{
+    new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait,
+};
+use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
 use arrow_schema::DataType::{FixedSizeList, LargeList, List};
 use arrow_schema::{DataType, FieldRef};
 use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
@@ -134,6 +136,23 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
         return exec_err!("array_resize needs two or three arguments");
     }
 
+    let array = &arg[0];
+
+    // Checks if entire array is null
+    if array.null_count() == array.len() {
+        let return_type = match array.data_type() {
+            List(field) => List(Arc::clone(field)),
+            LargeList(field) => LargeList(Arc::clone(field)),
+            _ => {
+                return exec_err!(
+                    "array_resize does not support type '{:?}'.",
+                    array.data_type()
+                )
+            }
+        };
+        return Ok(new_null_array(&return_type, array.len()));
+    }
+
     let new_len = as_int64_array(&arg[1])?;
     let new_element = if arg.len() == 3 {
         Some(Arc::clone(&arg[2]))
@@ -184,7 +203,16 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
         capacity,
     );
 
+    let mut null_builder = BooleanBufferBuilder::new(array.len());
+
     for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+        if array.is_null(row_index) {
+            null_builder.append(false);
+            offsets.push(offsets[row_index]);
+            continue;
+        }
+        null_builder.append(true);
+
         let count = count_array.value(row_index).to_usize().ok_or_else(|| {
             internal_datafusion_err!("array_resize: failed to convert size to usize")
         })?;
@@ -211,10 +239,12 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
     }
 
     let data = mutable.freeze();
+    let null_bit_buffer: NullBuffer = null_builder.finish().into();
+
     Ok(Arc::new(GenericListArray::<O>::try_new(
         Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
         arrow_array::make_array(data),
-        None,
+        Some(null_bit_buffer),
     )?))
 }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index bfdbfb1bcc5e..1e60699a1f65 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -6985,7 +6985,7 @@ select array_resize(column1, column2, column3) from arrays_values;
 [11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2]
 [21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
 [31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
-[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
+NULL
 []
 [51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]
 [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
@@ -6997,7 +6997,7 @@ select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) f
 [11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2]
 [21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
 [31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
-[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
+NULL
 []
 [51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]
 [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
@@ -7013,6 +7013,51 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
 ----
 [[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]
 
+# array_resize null value
+query ?
+select array_resize(arrow_cast(NULL, 'List(Int8)'), 1);
+----
+NULL
+
+statement ok
+CREATE TABLE array_resize_values 
+AS VALUES
+  (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1),
+  (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2),
+  (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3),
+  (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4),
+  (NULL, 3, 0),
+  (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6),
+  (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL),
+  (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7)
+;
+
+# array_resize columnar test #1
+query ?
+select array_resize(column1, column2, column3) from array_resize_values;
+----
+[1, ]
+[11, 12, , 14, 15]
+[21, 22, 23, 24, , 26, 27, 28]
+[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4]
+NULL
+[]
+[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7]
+
+# array_resize columnar test #2
+query ?
+select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values;
+----
+[1, ]
+[11, 12, , 14, 15]
+[21, 22, 23, 24, , 26, 27, 28]
+[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4]
+NULL
+[]
+[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7]
+
 ## array_reverse
 query ??
 select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));

From 344f0897d105cdd588d7a7ee8cef0646f844937a Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
Date: Sat, 2 Nov 2024 11:22:21 +0100
Subject: [PATCH 31/32] Revert "Improve push down filter of join (#13184)"
 (#13229)

This reverts commit 7ae1ccb4971474d376e5bed1d7116fcaf23f906f.
---
 datafusion/optimizer/src/push_down_filter.rs  | 337 ++--------
 datafusion/optimizer/src/utils.rs             |   8 -
 .../join_disable_repartition_joins.slt        |  23 +-
 .../test_files/push_down_filter_join.slt      | 612 ------------------
 4 files changed, 71 insertions(+), 909 deletions(-)
 delete mode 100644 datafusion/sqllogictest/test_files/push_down_filter_join.slt

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 269ce2910074..acb7ba0fa757 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -36,9 +36,7 @@ use datafusion_expr::{
 };
 
 use crate::optimizer::ApplyOrder;
-use crate::utils::{
-    contain_all_columns, has_all_column_refs, is_restrict_null_predicate,
-};
+use crate::utils::{has_all_column_refs, is_restrict_null_predicate};
 use crate::{OptimizerConfig, OptimizerRule};
 
 /// Optimizer rule for pushing (moving) filter expressions down in a plan so
@@ -217,19 +215,19 @@ impl<'a> ColumnChecker<'a> {
     }
 
     /// Return true if the expression references only columns from the left side of the join
-    fn left_only(&mut self, column_refs: &HashSet<&Column>) -> bool {
+    fn is_left_only(&mut self, predicate: &Expr) -> bool {
         if self.left_columns.is_none() {
             self.left_columns = Some(schema_columns(self.left_schema));
         }
-        contain_all_columns(column_refs, self.left_columns.as_ref().unwrap())
+        has_all_column_refs(predicate, self.left_columns.as_ref().unwrap())
     }
 
     /// Return true if the expression references only columns from the right side of the join
-    fn right_only(&mut self, column_refs: &HashSet<&Column>) -> bool {
+    fn is_right_only(&mut self, predicate: &Expr) -> bool {
         if self.right_columns.is_none() {
             self.right_columns = Some(schema_columns(self.right_schema));
         }
-        contain_all_columns(column_refs, self.right_columns.as_ref().unwrap())
+        has_all_column_refs(predicate, self.right_columns.as_ref().unwrap())
     }
 }
 
@@ -413,13 +411,10 @@ fn extract_or_clause(expr: &Expr, schema_columns: &HashSet<Column>) -> Option<Ex
 /// push down join/cross-join
 fn push_down_all_join(
     predicates: Vec<Expr>,
-    inferred_from_predicates: Vec<Expr>,
+    inferred_join_predicates: Vec<Expr>,
     mut join: Join,
     on_filter: Vec<Expr>,
-    inferred_from_on_filter: Vec<Expr>,
 ) -> Result<Transformed<LogicalPlan>> {
-    assert_ne!(join.join_type, JoinType::Full);
-
     let is_inner_join = join.join_type == JoinType::Inner;
     // Get pushable predicates from current optimizer state
     let (left_preserved, right_preserved) = lr_is_preserved(join.join_type);
@@ -435,24 +430,14 @@ fn push_down_all_join(
     let mut keep_predicates = vec![];
     let mut join_conditions = vec![];
     let mut checker = ColumnChecker::new(left_schema, right_schema);
-
     for predicate in predicates {
-        let columns = predicate.column_refs();
-        macro_rules! restrict_null {
-            () => {{
-                let predicate_cloned = predicate.clone();
-                let cols = columns.iter().cloned();
-                is_restrict_null_predicate(predicate_cloned, cols).unwrap_or(false)
-            }};
-        }
-
-        if checker.left_only(&columns) && (left_preserved || restrict_null!()) {
+        if left_preserved && checker.is_left_only(&predicate) {
             left_push.push(predicate);
-        } else if checker.right_only(&columns) && (right_preserved || restrict_null!()) {
+        } else if right_preserved && checker.is_right_only(&predicate) {
             right_push.push(predicate);
         } else if is_inner_join && can_evaluate_as_join_condition(&predicate)? {
-            // Here we do not differ it is eq or non-eq predicate, ExtractEquijoinPredicate will
-            // extract the eq predicate and convert to the join on condition
+            // Here we do not differ it is eq or non-eq predicate, ExtractEquijoinPredicate will extract the eq predicate
+            // and convert to the join on condition
             join_conditions.push(predicate);
         } else {
             keep_predicates.push(predicate);
@@ -460,13 +445,10 @@ fn push_down_all_join(
     }
 
     // For infer predicates, if they can not push through join, just drop them
-    // Because we check whether it is_restrict_null in the process of Infer, there is no need to
-    // check again
-    for predicate in inferred_from_predicates {
-        let columns = predicate.column_refs();
-        if checker.left_only(&columns) {
+    for predicate in inferred_join_predicates {
+        if left_preserved && checker.is_left_only(&predicate) {
             left_push.push(predicate);
-        } else if checker.right_only(&columns) {
+        } else if right_preserved && checker.is_right_only(&predicate) {
             right_push.push(predicate);
         }
     }
@@ -474,24 +456,15 @@ fn push_down_all_join(
     let mut on_filter_join_conditions = vec![];
     let (on_left_preserved, on_right_preserved) = on_lr_is_preserved(join.join_type);
 
-    for on in on_filter {
-        let columns = on.column_refs();
-        if on_left_preserved && checker.left_only(&columns) {
-            left_push.push(on)
-        } else if on_right_preserved && checker.right_only(&columns) {
-            right_push.push(on)
-        } else {
-            on_filter_join_conditions.push(on)
-        }
-    }
-
-    // For infer predicates, if they can not push through join, just drop them
-    for on in inferred_from_on_filter {
-        let columns = on.column_refs();
-        if on_left_preserved && checker.left_only(&columns) {
-            left_push.push(on)
-        } else if on_right_preserved && checker.right_only(&columns) {
-            right_push.push(on)
+    if !on_filter.is_empty() {
+        for on in on_filter {
+            if on_left_preserved && checker.is_left_only(&on) {
+                left_push.push(on)
+            } else if on_right_preserved && checker.is_right_only(&on) {
+                right_push.push(on)
+            } else {
+                on_filter_join_conditions.push(on)
+            }
         }
     }
 
@@ -547,17 +520,6 @@ fn push_down_join(
     join: Join,
     parent_predicate: Option<&Expr>,
 ) -> Result<Transformed<LogicalPlan>> {
-    if matches!(join.join_type, JoinType::Full) {
-        let plan = LogicalPlan::Join(join);
-        return Ok(match parent_predicate {
-            Some(predicate) => Transformed::yes(LogicalPlan::Filter(Filter::try_new(
-                predicate.clone(),
-                Arc::new(plan),
-            )?)),
-            None => Transformed::no(plan),
-        });
-    }
-
     // Split the parent predicate into individual conjunctive parts.
     let predicates = parent_predicate
         .map_or_else(Vec::new, |pred| split_conjunction_owned(pred.clone()));
@@ -569,24 +531,17 @@ fn push_down_join(
         .map_or_else(Vec::new, |filter| split_conjunction_owned(filter.clone()));
 
     // Are there any new join predicates that can be inferred from the filter expressions?
-    let (inferred_from_predicates, inferred_from_on_filter) =
+    let inferred_join_predicates =
         infer_join_predicates(&join, &predicates, &on_filters)?;
 
     if on_filters.is_empty()
-        && inferred_from_on_filter.is_empty()
         && predicates.is_empty()
-        && inferred_from_predicates.is_empty()
+        && inferred_join_predicates.is_empty()
     {
         return Ok(Transformed::no(LogicalPlan::Join(join)));
     }
 
-    push_down_all_join(
-        predicates,
-        inferred_from_predicates,
-        join,
-        on_filters,
-        inferred_from_on_filter,
-    )
+    push_down_all_join(predicates, inferred_join_predicates, join, on_filters)
 }
 
 /// Extracts any equi-join join predicates from the given filter expressions.
@@ -599,13 +554,11 @@ fn push_down_join(
 /// * `on_filters` filters from the join ON clause that have not already been
 ///   identified as join predicates
 ///
-/// # Return Value
-/// A tuple of Expr Vec - (inferred_from_predicates, inferred_from_on_filters).
 fn infer_join_predicates(
     join: &Join,
     predicates: &[Expr],
     on_filters: &[Expr],
-) -> Result<(Vec<Expr>, Vec<Expr>)> {
+) -> Result<Vec<Expr>> {
     // Only allow both side key is column.
     let join_col_keys = join
         .on
@@ -626,7 +579,6 @@ fn infer_join_predicates(
         predicates,
         &mut inferred_predicates,
     )?;
-    let inferred_from_predicates = inferred_predicates.take_all();
 
     infer_join_predicates_from_on_filters(
         &join_col_keys,
@@ -634,9 +586,8 @@ fn infer_join_predicates(
         on_filters,
         &mut inferred_predicates,
     )?;
-    let inferred_from_on_filters = inferred_predicates.predicates;
 
-    Ok((inferred_from_predicates, inferred_from_on_filters))
+    Ok(inferred_predicates.predicates)
 }
 
 /// Inferred predicates collector.
@@ -660,12 +611,6 @@ impl InferredPredicates {
         }
     }
 
-    fn take_all(&mut self) -> Vec<Expr> {
-        let mut temp = vec![];
-        std::mem::swap(&mut self.predicates, &mut temp);
-        temp
-    }
-
     fn try_build_predicate(
         &mut self,
         predicate: Expr,
@@ -2165,10 +2110,11 @@ mod tests {
 
         // filter not duplicated nor pushed down - i.e. noop
         let expected = "\
-        Left Join: Using test.a = test2.a\
-        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
-        \n  Projection: test2.a\
-        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        Filter: test2.a <= Int64(1)\
+        \n  Left Join: Using test.a = test2.a\
+        \n    TableScan: test, full_filters=[test.a <= Int64(1)]\
+        \n    Projection: test2.a\
+        \n      TableScan: test2";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2202,10 +2148,11 @@ mod tests {
 
         // filter not duplicated nor pushed down - i.e. noop
         let expected = "\
-        Right Join: Using test.a = test2.a\
-        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
-        \n  Projection: test2.a\
-        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        Filter: test.a <= Int64(1)\
+        \n  Right Join: Using test.a = test2.a\
+        \n    TableScan: test\
+        \n    Projection: test2.a\
+        \n      TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2243,7 +2190,7 @@ mod tests {
         Left Join: Using test.a = test2.a\
         \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
         \n  Projection: test2.a\
-        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        \n    TableScan: test2";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2279,7 +2226,7 @@ mod tests {
         // filter sent to right side of join, not duplicated to the left
         let expected = "\
         Right Join: Using test.a = test2.a\
-        \n  TableScan: test, full_filters=[test.a <= Int64(1)]\
+        \n  TableScan: test\
         \n  Projection: test2.a\
         \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
@@ -2328,47 +2275,6 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
-    #[test]
-    fn join_with_non_restrict_null_predicate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let right_table_scan = test_table_scan_with_name("test2")?;
-        let right = LogicalPlanBuilder::from(right_table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let filter = col("test.b").is_null().and(col("test2.b").is_null());
-        let plan = LogicalPlanBuilder::from(left)
-            .join(
-                right,
-                JoinType::Inner,
-                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
-                None,
-            )?
-            .filter(filter)?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{plan}"),
-            "Filter: test.b IS NULL AND test2.b IS NULL\
-             \n  Inner Join: test.a = test2.a\
-             \n    Projection: test.a, test.b, test.c\
-             \n      TableScan: test\
-             \n    Projection: test2.a, test2.b, test2.c\
-             \n      TableScan: test2"
-        );
-
-        let expected = "\
-        Inner Join: test.a = test2.a\
-        \n  Projection: test.a, test.b, test.c\
-        \n    TableScan: test, full_filters=[test.b IS NULL]\
-        \n  Projection: test2.a, test2.b, test2.c\
-        \n    TableScan: test2, full_filters=[test2.b IS NULL]";
-        assert_optimized_plan_eq(plan, expected)
-    }
-
     /// join filter should be completely removed after pushdown
     #[test]
     fn join_filter_removed() -> Result<()> {
@@ -2490,49 +2396,7 @@ mod tests {
         \n  Projection: test.a, test.b, test.c\
         \n    TableScan: test\
         \n  Projection: test2.a, test2.b, test2.c\
-        \n    TableScan: test2, full_filters=[test2.c > UInt32(4), test2.a > UInt32(1)]";
-        assert_optimized_plan_eq(plan, expected)
-    }
-
-    #[test]
-    fn left_join_with_non_restrict_null_predicate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let right_table_scan = test_table_scan_with_name("test2")?;
-        let right = LogicalPlanBuilder::from(right_table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let filter = col("test.b").is_null().and(col("test2.b").is_null());
-        let plan = LogicalPlanBuilder::from(left)
-            .join(
-                right,
-                JoinType::Left,
-                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
-                None,
-            )?
-            .filter(filter)?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{plan}"),
-            "Filter: test.b IS NULL AND test2.b IS NULL\
-            \n  Left Join: test.a = test2.a\
-            \n    Projection: test.a, test.b, test.c\
-            \n      TableScan: test\
-            \n    Projection: test2.a, test2.b, test2.c\
-            \n      TableScan: test2"
-        );
-
-        let expected = "\
-        Filter: test2.b IS NULL\
-        \n  Left Join: test.a = test2.a\
-        \n    Projection: test.a, test.b, test.c\
-        \n      TableScan: test, full_filters=[test.b IS NULL]\
-        \n    Projection: test2.a, test2.b, test2.c\
-        \n      TableScan: test2";
+        \n    TableScan: test2, full_filters=[test2.c > UInt32(4)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -2579,87 +2443,6 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
-    #[test]
-    fn right_join_with_non_restrict_null_predicate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let right_table_scan = test_table_scan_with_name("test2")?;
-        let right = LogicalPlanBuilder::from(right_table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let filter = col("test.b").is_null().and(col("test2.b").is_null());
-        let plan = LogicalPlanBuilder::from(left)
-            .join(
-                right,
-                JoinType::Right,
-                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
-                None,
-            )?
-            .filter(filter)?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{plan}"),
-            "Filter: test.b IS NULL AND test2.b IS NULL\
-            \n  Right Join: test.a = test2.a\
-            \n    Projection: test.a, test.b, test.c\
-            \n      TableScan: test\
-            \n    Projection: test2.a, test2.b, test2.c\
-            \n      TableScan: test2"
-        );
-
-        let expected = "\
-        Filter: test.b IS NULL\
-        \n  Right Join: test.a = test2.a\
-        \n    Projection: test.a, test.b, test.c\
-        \n      TableScan: test\
-        \n    Projection: test2.a, test2.b, test2.c\
-        \n      TableScan: test2, full_filters=[test2.b IS NULL]";
-        assert_optimized_plan_eq(plan, expected)
-    }
-
-    #[test]
-    fn full_join() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let right_table_scan = test_table_scan_with_name("test2")?;
-        let right = LogicalPlanBuilder::from(right_table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .build()?;
-        let filter = col("test.a")
-            .gt(lit(1u32))
-            .and(col("test.b").lt(col("test2.b")))
-            .and(col("test2.c").gt(lit(4u32)));
-        let plan = LogicalPlanBuilder::from(left)
-            .join(
-                right,
-                JoinType::Full,
-                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
-                None,
-            )?
-            .filter(filter)?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{plan}"),
-            "Filter: test.a > UInt32(1) AND test.b < test2.b AND test2.c > UInt32(4)\
-            \n  Full Join: test.a = test2.a\
-            \n    Projection: test.a, test.b, test.c\
-            \n      TableScan: test\
-            \n    Projection: test2.a, test2.b, test2.c\
-            \n      TableScan: test2"
-        );
-
-        let expected = &format!("{plan}");
-        assert_optimized_plan_eq(plan, expected)
-    }
-
     /// single table predicate parts of ON condition should not be pushed
     #[test]
     fn full_join_on_with_filter() -> Result<()> {
@@ -3183,10 +2966,11 @@ Projection: a, b
 
         // Inferred the predicate `test1.a <= Int64(1)` and push it down to the left side.
         let expected = "\
-        LeftSemi Join: test1.a = test2.a\
-        \n  TableScan: test1, full_filters=[test1.a <= Int64(1)]\
-        \n  Projection: test2.a, test2.b\
-        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        Filter: test2.a <= Int64(1)\
+        \n  LeftSemi Join: test1.a = test2.a\
+        \n    TableScan: test1, full_filters=[test1.a <= Int64(1)]\
+        \n    Projection: test2.a, test2.b\
+        \n      TableScan: test2";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3263,10 +3047,11 @@ Projection: a, b
 
         // Inferred the predicate `test2.a <= Int64(1)` and push it down to the right side.
         let expected = "\
-        RightSemi Join: test1.a = test2.a\
-        \n  TableScan: test1, full_filters=[test1.a <= Int64(1)]\
-        \n  Projection: test2.a, test2.b\
-        \n    TableScan: test2, full_filters=[test2.a <= Int64(1)]";
+        Filter: test1.a <= Int64(1)\
+        \n  RightSemi Join: test1.a = test2.a\
+        \n    TableScan: test1\
+        \n    Projection: test2.a, test2.b\
+        \n      TableScan: test2, full_filters=[test2.a <= Int64(1)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3347,11 +3132,12 @@ Projection: a, b
 
         // For left anti, filter of the right side filter can be pushed down.
         let expected = "\
-        LeftAnti Join: test1.a = test2.a\
-        \n  Projection: test1.a, test1.b\
-        \n    TableScan: test1, full_filters=[test1.a > UInt32(2)]\
-        \n  Projection: test2.a, test2.b\
-        \n    TableScan: test2, full_filters=[test2.a > UInt32(2)]";
+        Filter: test2.a > UInt32(2)\
+        \n  LeftAnti Join: test1.a = test2.a\
+        \n    Projection: test1.a, test1.b\
+        \n      TableScan: test1, full_filters=[test1.a > UInt32(2)]\
+        \n    Projection: test2.a, test2.b\
+        \n      TableScan: test2";
         assert_optimized_plan_eq(plan, expected)
     }
 
@@ -3437,11 +3223,12 @@ Projection: a, b
 
         // For right anti, filter of the left side can be pushed down.
         let expected = "\
-        RightAnti Join: test1.a = test2.a\
-        \n  Projection: test1.a, test1.b\
-        \n    TableScan: test1, full_filters=[test1.a > UInt32(2)]\
-        \n  Projection: test2.a, test2.b\
-        \n    TableScan: test2, full_filters=[test2.a > UInt32(2)]";
+        Filter: test1.a > UInt32(2)\
+        \n  RightAnti Join: test1.a = test2.a\
+        \n    Projection: test1.a, test1.b\
+        \n      TableScan: test1\
+        \n    Projection: test2.a, test2.b\
+        \n      TableScan: test2, full_filters=[test2.a > UInt32(2)]";
         assert_optimized_plan_eq(plan, expected)
     }
 
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 9f8d7b7f97a0..9f325bc01b1d 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -79,14 +79,6 @@ pub fn optimize_children(
 /// Returns true if `expr` contains all columns in `schema_cols`
 pub(crate) fn has_all_column_refs(expr: &Expr, schema_cols: &HashSet<Column>) -> bool {
     let column_refs = expr.column_refs();
-    contain_all_columns(&column_refs, schema_cols)
-}
-
-/// Returns true if `column_refs` contains all columns in `schema_cols`
-pub(crate) fn contain_all_columns(
-    column_refs: &HashSet<&Column>,
-    schema_cols: &HashSet<Column>,
-) -> bool {
     // note can't use HashSet::intersect because of different types (owned vs References)
     schema_cols
         .iter()
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 44c5ed0d2dab..cf897d628da5 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -89,25 +89,20 @@ logical_plan
 02)--Projection: t2.a AS a2, t2.b
 03)----RightSemi Join: t1.d = t2.d, t1.c = t2.c
 04)------SubqueryAlias: t1
-05)--------Filter: annotated_data.d = Int32(3)
-06)----------TableScan: annotated_data projection=[c, d], partial_filters=[annotated_data.d = Int32(3)]
-07)------SubqueryAlias: t2
-08)--------Filter: annotated_data.d = Int32(3)
-09)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
+05)--------TableScan: annotated_data projection=[c, d]
+06)------SubqueryAlias: t2
+07)--------Filter: annotated_data.d = Int32(3)
+08)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
 physical_plan
 01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
-05)--------CoalescePartitionsExec
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: d@1 = 3
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
-10)--------CoalesceBatchesExec: target_batch_size=8192
-11)----------FilterExec: d@3 = 3
-12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
+06)--------CoalesceBatchesExec: target_batch_size=8192
+07)----------FilterExec: d@3 = 3
+08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 # preserve_right_semi_join
 query II nosort
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_join.slt b/datafusion/sqllogictest/test_files/push_down_filter_join.slt
deleted file mode 100644
index f687c542a683..000000000000
--- a/datafusion/sqllogictest/test_files/push_down_filter_join.slt
+++ /dev/null
@@ -1,612 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-
-#   http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Contents
-# Inner Join
-#   `WHERE` clause
-#   `ON` clause
-# Left Outer Join
-#   `WHERE` clause
-#   `ON` clause
-# Right Outer Join
-#   `WHERE` clause
-#   `ON` clause
-# Full Outer Join
-#   `WHERE` clause
-#   `ON` clause
-
-# Create table t1
-statement ok
-CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
-(11, 'a'),
-(22, 'b'),
-(33, 'c'),
-(44, 'd'),
-(77, 'e'),
-(88, NULL),
-(99, NULL)
-
-# Create table t2
-statement ok
-CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
-(11, 'z'),
-(22, NULL),
-(44, 'x'),
-(55, 'w'),
-(99, 'u')
-
-# Inner Join
-
-## `WHERE` clause
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-44 d 44 x
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-
-## `ON` clause
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 INNER JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-# Left Outer Join
-
-## `WHERE` clause
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d 44 x
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-88 NULL NULL NULL
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-33 c NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-33 c NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-
-## `ON` clause
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 LEFT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-
-# Right Outer Join
-
-## `WHERE` clause
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-44 d 44 x
-99 NULL 99 u
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-
-## `ON` clause
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-NULL NULL 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-NULL NULL 11 z
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-99 NULL 99 u
-NULL NULL 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-NULL NULL 11 z
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 RIGHT JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-# Full Outer Join
-
-## `WHERE` clause
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d 44 x
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-88 NULL NULL NULL
-99 NULL 99 u
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-22 b 22 NULL
-33 c NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t1.t1_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id WHERE t2.t2_id IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-33 c NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-
-## `ON` clause
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name = 'a' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name = 'z' ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-NULL NULL 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 11 z
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b NULL NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL 99 u
-NULL NULL 11 z
-NULL NULL 22 NULL
-NULL NULL 44 x
-NULL NULL 55 w
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_name IS NULL ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a NULL NULL
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 11 z
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t1.t1_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u
-
-query ITIT
-SELECT * FROM t1 FULL JOIN t2 ON t1.t1_id = t2.t2_id AND t2.t2_id <= 22 ORDER BY t1_id, t1_name, t2_id, t2_name
-----
-11 a 11 z
-22 b 22 NULL
-33 c NULL NULL
-44 d NULL NULL
-77 e NULL NULL
-88 NULL NULL NULL
-99 NULL NULL NULL
-NULL NULL 44 x
-NULL NULL 55 w
-NULL NULL 99 u

From 89e96b404f07900469fee31740f43edd8a410a10 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 2 Nov 2024 06:24:37 -0400
Subject: [PATCH 32/32] Derive `Clone` for more ExecutionPlans (#13203)

* Derive `Clone` for more ExecutionPlans

* improve docs
---
 datafusion/physical-plan/src/aggregates/mod.rs               | 2 +-
 datafusion/physical-plan/src/coalesce_batches.rs             | 2 +-
 datafusion/physical-plan/src/coalesce_partitions.rs          | 2 +-
 datafusion/physical-plan/src/empty.rs                        | 2 +-
 datafusion/physical-plan/src/filter.rs                       | 2 +-
 datafusion/physical-plan/src/insert.rs                       | 1 +
 datafusion/physical-plan/src/joins/cross_join.rs             | 5 +++++
 datafusion/physical-plan/src/joins/hash_join.rs              | 5 +++++
 datafusion/physical-plan/src/joins/nested_loop_join.rs       | 4 ++++
 datafusion/physical-plan/src/joins/sort_merge_join.rs        | 2 +-
 datafusion/physical-plan/src/joins/symmetric_hash_join.rs    | 2 +-
 datafusion/physical-plan/src/limit.rs                        | 2 +-
 datafusion/physical-plan/src/memory.rs                       | 1 +
 datafusion/physical-plan/src/placeholder_row.rs              | 2 +-
 datafusion/physical-plan/src/recursive_query.rs              | 2 +-
 datafusion/physical-plan/src/repartition/mod.rs              | 2 +-
 datafusion/physical-plan/src/sorts/sort.rs                   | 2 +-
 datafusion/physical-plan/src/sorts/sort_preserving_merge.rs  | 2 +-
 datafusion/physical-plan/src/streaming.rs                    | 1 +
 datafusion/physical-plan/src/union.rs                        | 4 ++--
 datafusion/physical-plan/src/unnest.rs                       | 2 +-
 datafusion/physical-plan/src/values.rs                       | 2 +-
 .../physical-plan/src/windows/bounded_window_agg_exec.rs     | 2 +-
 datafusion/physical-plan/src/windows/window_agg_exec.rs      | 2 +-
 24 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 4193cc187e10..5ffe797c5c26 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -344,7 +344,7 @@ impl From<StreamType> for SendableRecordBatchStream {
 }
 
 /// Hash aggregate execution plan
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct AggregateExec {
     /// Aggregation mode (full, partial)
     mode: AggregateMode,
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 61fb3599f013..11678e7a4696 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -48,7 +48,7 @@ use futures::stream::{Stream, StreamExt};
 /// reaches the `fetch` value.
 ///
 /// See [`BatchCoalescer`] for more information
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct CoalesceBatchesExec {
     /// The input plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index f9d4ec6a1a34..3da101d6092f 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -36,7 +36,7 @@ use datafusion_execution::TaskContext;
 
 /// Merge execution plan executes partitions in parallel and combines them into a single
 /// partition. No guarantees are made about the order of the resulting partition.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct CoalescePartitionsExec {
     /// Input execution plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index f6e0abb94fa8..192619f69f6a 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -35,7 +35,7 @@ use datafusion_physical_expr::EquivalenceProperties;
 use log::trace;
 
 /// Execution plan for empty relation with produce_one_row=false
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct EmptyExec {
     /// The schema for the produced row
     schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 30b0af19f43b..97d8159137f4 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -54,7 +54,7 @@ use log::trace;
 
 /// FilterExec evaluates a boolean predicate against all input batches to determine which rows to
 /// include in its output batches.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct FilterExec {
     /// The expression to filter on. This expression must evaluate to a boolean value.
     predicate: Arc<dyn PhysicalExpr>,
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 8b3ef5ae01e4..e478cecb7ffc 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -79,6 +79,7 @@ pub type FileSinkExec = DataSinkExec;
 /// Execution plan for writing record batches to a [`DataSink`]
 ///
 /// Returns a single row with the number of values written
+#[derive(Clone)]
 pub struct DataSinkExec {
     /// Input plan that produces the record batches to be written.
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 8f49885068fd..a67e1df47bc7 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -49,8 +49,13 @@ use futures::{ready, Stream, StreamExt, TryStreamExt};
 /// Data of the left side
 type JoinLeftData = (RecordBatch, MemoryReservation);
 
+#[allow(rustdoc::private_intra_doc_links)]
 /// executes partitions in parallel and combines them into a set of
 /// partitions by combining all values from the left with all values on the right
+///
+/// Note that the `Clone` trait is not implemented for this struct due to the
+/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the
+/// left side with the processing in each output stream.
 #[derive(Debug)]
 pub struct CrossJoinExec {
     /// left (build) side which gets loaded in memory
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index c56c179c17eb..57d8a9ce7b35 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -136,6 +136,7 @@ impl JoinLeftData {
     }
 }
 
+#[allow(rustdoc::private_intra_doc_links)]
 /// Join execution plan: Evaluates eqijoin predicates in parallel on multiple
 /// partitions using a hash table and an optional filter list to apply post
 /// join.
@@ -293,6 +294,10 @@ impl JoinLeftData {
 ///                       │  "dimension"  │     │    "fact"     │
 ///                       └───────────────┘     └───────────────┘
 /// ```
+///
+/// Note that the `Clone` trait is not implemented for this struct due to the
+/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the
+/// left side with the processing in each output stream.
 #[derive(Debug)]
 pub struct HashJoinExec {
     /// left (build) side which gets hashed
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index a87743565adf..f36c2395e20f 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -105,6 +105,7 @@ impl JoinLeftData {
     }
 }
 
+#[allow(rustdoc::private_intra_doc_links)]
 /// NestedLoopJoinExec is build-probe join operator, whose main task is to
 /// perform joins without any equijoin conditions in `ON` clause.
 ///
@@ -140,6 +141,9 @@ impl JoinLeftData {
 /// "reports" about probe phase completion (which means that "visited" bitmap won't be
 /// updated anymore), and only the last thread, reporting about completion, will return output.
 ///
+/// Note that the `Clone` trait is not implemented for this struct due to the
+/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the
+/// left side with the processing in each output stream.
 #[derive(Debug)]
 pub struct NestedLoopJoinExec {
     /// left side
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 90dc407fcaed..3ad892c880f6 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -71,7 +71,7 @@ use crate::{
 
 /// join execution plan executes partitions in parallel and combines them into a set of
 /// partitions.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct SortMergeJoinExec {
     /// Left sorted joining execution plan
     pub left: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 81c13c652513..5b6dc2cd2ae9 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -167,7 +167,7 @@ const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4;
 /// making the smallest value in 'left_sorted' 1231 and any rows below (since ascending)
 /// than that can be dropped from the inner buffer.
 /// ```
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct SymmetricHashJoinExec {
     /// Left side stream
     pub(crate) left: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 1fe550a93056..ab1e6cb37bc8 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -39,7 +39,7 @@ use futures::stream::{Stream, StreamExt};
 use log::trace;
 
 /// Limit execution plan
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct GlobalLimitExec {
     /// Input execution plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 56ed144845a0..c9ada345afc7 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -40,6 +40,7 @@ use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 use futures::Stream;
 
 /// Execution plan for reading in-memory batches of data
+#[derive(Clone)]
 pub struct MemoryExec {
     /// The partitions to query
     partitions: Vec<Vec<RecordBatch>>,
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index 5d8ca7e76935..f9437f46f8a6 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -37,7 +37,7 @@ use datafusion_physical_expr::EquivalenceProperties;
 use log::trace;
 
 /// Execution plan for empty relation with produce_one_row=true
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct PlaceholderRowExec {
     /// The schema for the produced row
     schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index e9ea9d4f5032..cbf22a4b392f 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -53,7 +53,7 @@ use futures::{ready, Stream, StreamExt};
 /// Note that there won't be any limit or checks applied to detect
 /// an infinite recursion, so it is up to the planner to ensure that
 /// it won't happen.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct RecursiveQueryExec {
     /// Name of the query handler
     name: String,
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 06144f98c89d..bc65b251561b 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -399,7 +399,7 @@ impl BatchPartitioner {
 /// Paper](https://w6113.github.io/files/papers/volcanoparallelism-89.pdf)
 /// which uses the term "Exchange" for the concept of repartitioning
 /// data across threads.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct RepartitionExec {
     /// Input execution plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 32d6d3e0073c..d90d0f64ceb4 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -675,7 +675,7 @@ pub(crate) fn lexsort_to_indices_multi_columns(
 ///
 /// Support sorting datasets that are larger than the memory allotted
 /// by the memory manager, by spilling to disk.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct SortExec {
     /// Input schema
     pub(crate) input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index ae39cfe412ba..9ee0faaa0a44 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -71,7 +71,7 @@ use log::{debug, trace};
 ///
 /// If any of the input partitions return an error, the error is propagated to
 /// the output and inputs are not polled again.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct SortPreservingMergeExec {
     /// Input plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index cdb94af1fe8a..7ccef3248069 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -55,6 +55,7 @@ pub trait PartitionStream: Debug + Send + Sync {
 ///
 /// If your source can be represented as one or more [`PartitionStream`]s, you can
 /// use this struct to implement [`ExecutionPlan`].
+#[derive(Clone)]
 pub struct StreamingTableExec {
     partitions: Vec<Arc<dyn PartitionStream>>,
     projection: Option<Arc<[usize]>>,
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 69cc63f8f65d..bd36753880eb 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -85,7 +85,7 @@ use tokio::macros::support::thread_rng_n;
 ///                  │Input 1          │  │Input 2           │
 ///                  └─────────────────┘  └──────────────────┘
 /// ```
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct UnionExec {
     /// Input execution plan
     inputs: Vec<Arc<dyn ExecutionPlan>>,
@@ -298,7 +298,7 @@ impl ExecutionPlan for UnionExec {
 /// |         |-----------------+
 /// +---------+
 /// ```
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct InterleaveExec {
     /// Input execution plan
     inputs: Vec<Arc<dyn ExecutionPlan>>,
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 3e312b7451be..b7b9f17eb1b6 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -56,7 +56,7 @@ use log::trace;
 /// Thus the original RecordBatch with dimension (n x m) may have new dimension (n' x m')
 ///
 /// See [`UnnestOptions`] for more details and an example.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct UnnestExec {
     /// Input execution plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index 991146d245a7..edadf98cb10c 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -36,7 +36,7 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
 /// Execution plan for values list based relation (produces constant rows)
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ValuesExec {
     /// The schema
     schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 2c60be49a480..8c0331f94570 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -67,7 +67,7 @@ use indexmap::IndexMap;
 use log::debug;
 
 /// Window execution plan
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct BoundedWindowAggExec {
     /// Input plan
     input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 1318f36f269e..f71a0b9fd095 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -46,7 +46,7 @@ use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use futures::{ready, Stream, StreamExt};
 
 /// Window execution plan
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct WindowAggExec {
     /// Input plan
     pub(crate) input: Arc<dyn ExecutionPlan>,