diff --git a/CHANGELOG.md b/CHANGELOG.md index ea0c339ac451..c481ce0b96a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ under the License. --> -* [DataFusion CHANGELOG](./datafusion/CHANGELOG.md) +Change logs for each release can be found [here](dev/changelog). + For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md). diff --git a/Cargo.toml b/Cargo.toml index 0a7184ad2d99..21079c484ce0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "datafusion/expr", "datafusion/expr-common", "datafusion/execution", + "datafusion/ffi", "datafusion/functions", "datafusion/functions-aggregate", "datafusion/functions-aggregate-common", @@ -59,7 +60,7 @@ license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" rust-version = "1.79" -version = "42.1.0" +version = "42.2.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -92,29 +93,30 @@ bytes = "1.4" chrono = { version = "0.4.38", default-features = false } ctor = "0.2.0" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "42.1.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "42.1.0" } -datafusion-common = { path = "datafusion/common", version = "42.1.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.1.0" } -datafusion-execution = { path = "datafusion/execution", version = "42.1.0" } -datafusion-expr = { path = "datafusion/expr", version = "42.1.0" } -datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" } -datafusion-functions = { path = "datafusion/functions", version = "42.1.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.1.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "42.1.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.1.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "42.1.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.1.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.1.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.1.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.1.0" } -datafusion-proto = { path = "datafusion/proto", version = "42.1.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "42.1.0" } -datafusion-sql = { path = "datafusion/sql", version = "42.1.0" } -datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.1.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "42.1.0" } +datafusion = { path = "datafusion/core", version = "42.2.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "42.2.0" } +datafusion-common = { path = "datafusion/common", version = "42.2.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.2.0" } +datafusion-execution = { path = "datafusion/execution", version = "42.2.0" } +datafusion-expr = { path = "datafusion/expr", version = "42.2.0" } +datafusion-expr-common = { path = "datafusion/expr-common", version = "42.2.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "42.2.0" } +datafusion-functions = { path = "datafusion/functions", version = "42.2.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.2.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.2.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.2.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "42.2.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.2.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "42.2.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.2.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.2.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.2.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.2.0" } +datafusion-proto = { path = "datafusion/proto", version = "42.2.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "42.2.0" } +datafusion-sql = { path = "datafusion/sql", version = "42.2.0" } +datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.2.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "42.2.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs index 1bc74e22ccfa..6438593a20a0 100644 --- a/benchmarks/src/bin/external_aggr.rs +++ b/benchmarks/src/bin/external_aggr.rs @@ -193,12 +193,7 @@ impl ExternalAggrConfig { ) -> Result> { let query_name = format!("Q{query_id}({})", human_readable_size(mem_limit as usize)); - let mut config = self.common.config(); - config - .options_mut() - .execution - .parquet - .schema_force_view_types = self.common.force_view_types; + let config = self.common.config(); let runtime_config = RuntimeConfig::new() .with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize))) .build_arc()?; diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs index 3564ae82585a..46dd4b18825b 100644 --- a/benchmarks/src/clickbench.rs +++ b/benchmarks/src/clickbench.rs @@ -119,7 +119,6 @@ impl RunOpt { let mut config = self.common.config(); { let parquet_options = &mut config.options_mut().execution.parquet; - parquet_options.schema_force_view_types = self.common.force_view_types; // The hits_partitioned dataset specifies string columns // as binary due to how it was written. Force it to strings parquet_options.binary_as_string = true; diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs index fd4960606110..47c356990881 100644 --- a/benchmarks/src/imdb/run.rs +++ b/benchmarks/src/imdb/run.rs @@ -305,11 +305,7 @@ impl RunOpt { .config() .with_collect_statistics(!self.disable_statistics); config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join; - config - .options_mut() - .execution - .parquet - .schema_force_view_types = self.common.force_view_types; + let ctx = SessionContext::new_with_config(config); // register tables @@ -517,7 +513,6 @@ mod tests { partitions: Some(2), batch_size: 8192, debug: false, - force_view_types: false, }; let opt = RunOpt { query: Some(query), @@ -551,7 +546,6 @@ mod tests { partitions: Some(2), batch_size: 8192, debug: false, - force_view_types: false, }; let opt = RunOpt { query: Some(query), diff --git a/benchmarks/src/sort.rs b/benchmarks/src/sort.rs index 247727e1b484..b2038c432f77 100644 --- a/benchmarks/src/sort.rs +++ b/benchmarks/src/sort.rs @@ -22,7 +22,7 @@ use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt}; use arrow::util::pretty; use datafusion::common::Result; -use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr}; use datafusion::physical_plan::collect; use datafusion::physical_plan::sorts::sort::SortExec; use datafusion::prelude::{SessionConfig, SessionContext}; @@ -170,13 +170,13 @@ impl RunOpt { async fn exec_sort( ctx: &SessionContext, - expr: &[PhysicalSortExpr], + expr: LexOrderingRef<'_>, test_file: &TestParquetFile, debug: bool, ) -> Result<(usize, std::time::Duration)> { let start = Instant::now(); let scan = test_file.create_scan(ctx, None).await?; - let exec = Arc::new(SortExec::new(expr.to_owned(), scan)); + let exec = Arc::new(SortExec::new(LexOrdering::new(expr.to_owned()), scan)); let task_ctx = ctx.task_ctx(); let result = collect(exec, task_ctx).await?; let elapsed = start.elapsed(); diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs index e316a66e1c60..9ff1f72d8606 100644 --- a/benchmarks/src/tpch/run.rs +++ b/benchmarks/src/tpch/run.rs @@ -120,11 +120,6 @@ impl RunOpt { .config() .with_collect_statistics(!self.disable_statistics); config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join; - config - .options_mut() - .execution - .parquet - .schema_force_view_types = self.common.force_view_types; let ctx = SessionContext::new_with_config(config); // register tables @@ -345,7 +340,6 @@ mod tests { partitions: Some(2), batch_size: 8192, debug: false, - force_view_types: false, }; let opt = RunOpt { query: Some(query), @@ -379,7 +373,6 @@ mod tests { partitions: Some(2), batch_size: 8192, debug: false, - force_view_types: false, }; let opt = RunOpt { query: Some(query), diff --git a/benchmarks/src/util/options.rs b/benchmarks/src/util/options.rs index efdb074b2461..b9398e5b522f 100644 --- a/benchmarks/src/util/options.rs +++ b/benchmarks/src/util/options.rs @@ -37,11 +37,6 @@ pub struct CommonOpt { /// Activate debug mode to see more details #[structopt(short, long)] pub debug: bool, - - /// If true, will use StringView/BinaryViewArray instead of String/BinaryArray - /// when reading ParquetFiles - #[structopt(long)] - pub force_view_types: bool, } impl CommonOpt { diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 13a4fc05d2fd..f91b5faf3240 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.16" +version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f581a3637024bb8f62027f3ab6151f502090388c1dad05b01c70fb733b33c20" +checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" dependencies = [ "anstyle", "anstyle-parse", @@ -99,33 +99,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.5" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abbf7eaf69f3b46121caf74645dd5d3078b4b205a2513930da0033156682cd28" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", "windows-sys 0.59.0", @@ -456,9 +456,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.8" +version = "1.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7198e6f03240fdceba36656d8be440297b6b82270325908c7381f37d826a74f6" +checksum = "2d6448cfb224dd6a9b9ac734f58622dd0d4751f3589f3b777345745f46b2eb14" dependencies = [ "aws-credential-types", "aws-runtime", @@ -523,9 +523,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.46.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc2faec3205d496c7e57eff685dd944203df7ce16a4116d0281c44021788a7b" +checksum = "a8776850becacbd3a82a4737a9375ddb5c6832a51379f24443a98e61513f852c" dependencies = [ "aws-credential-types", "aws-runtime", @@ -545,9 +545,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.47.0" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c93c241f52bc5e0476e259c953234dab7e2a35ee207ee202e86c0095ec4951dc" +checksum = "0007b5b8004547133319b6c4e87193eee2a0bcb3e4c18c75d09febe9dab7b383" dependencies = [ "aws-credential-types", "aws-runtime", @@ -567,9 +567,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.46.0" +version = "1.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b259429be94a3459fa1b00c5684faee118d74f9577cc50aebadc36e507c63b5f" +checksum = "9fffaa356e7f1c725908b75136d53207fa714e348f365671df14e95a60530ad3" dependencies = [ "aws-credential-types", "aws-runtime", @@ -590,9 +590,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.4" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68" +checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -938,6 +938,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -1023,9 +1029,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "comfy-table" @@ -1182,7 +1188,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "apache-avro", @@ -1239,7 +1245,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow-schema", "async-trait", @@ -1252,7 +1258,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "assert_cmd", @@ -1282,7 +1288,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "apache-avro", @@ -1306,7 +1312,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.1.0" +version = "42.2.0" dependencies = [ "log", "tokio", @@ -1314,7 +1320,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "chrono", @@ -1333,7 +1339,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1355,7 +1361,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "datafusion-common", @@ -1365,7 +1371,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "arrow-buffer", @@ -1391,7 +1397,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1411,7 +1417,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1423,7 +1429,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "arrow-array", @@ -1444,7 +1450,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.1.0" +version = "42.2.0" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1457,7 +1463,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "42.1.0" +version = "42.2.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1465,7 +1471,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "async-trait", @@ -1483,7 +1489,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1509,7 +1515,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1521,7 +1527,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "arrow-schema", @@ -1535,7 +1541,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.1.0" +version = "42.2.0" dependencies = [ "ahash", "arrow", @@ -1568,7 +1574,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.1.0" +version = "42.2.0" dependencies = [ "arrow", "arrow-array", @@ -2138,7 +2144,7 @@ dependencies = [ "http 1.1.0", "hyper 1.5.0", "hyper-util", - "rustls 0.23.15", + "rustls 0.23.16", "rustls-native-certs 0.8.0", "rustls-pki-types", "tokio", @@ -2148,9 +2154,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -2373,9 +2379,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" @@ -2508,7 +2514,7 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.6.0", "cfg-if", - "cfg_aliases", + "cfg_aliases 0.1.1", "libc", ] @@ -2917,7 +2923,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.15", + "rustls 0.23.16", "socket2", "thiserror", "tokio", @@ -2934,7 +2940,7 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.15", + "rustls 0.23.16", "slab", "thiserror", "tinyvec", @@ -2943,10 +2949,11 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" dependencies = [ + "cfg_aliases 0.2.1", "libc", "once_cell", "socket2", @@ -3066,9 +3073,9 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -3089,7 +3096,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.15", + "rustls 0.23.16", "rustls-native-certs 0.8.0", "rustls-pemfile 2.2.0", "rustls-pki-types", @@ -3183,9 +3190,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -3208,9 +3215,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "once_cell", "ring", @@ -3395,18 +3402,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", @@ -3776,7 +3783,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.15", + "rustls 0.23.16", "rustls-pki-types", "tokio", ] @@ -4077,9 +4084,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 8e4352612889..049f87f08e69 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "42.1.0" +version = "42.2.0" authors = ["Apache DataFusion "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] @@ -39,7 +39,7 @@ aws-sdk-sts = "1.43.0" # end pin aws-sdk crates aws-credential-types = "1.2.0" clap = { version = "4.5.16", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "42.1.0", features = [ +datafusion = { path = "../datafusion/core", version = "42.2.0", features = [ "avro", "crypto_expressions", "datetime_expressions", diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 15290204fbac..336513035036 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -268,6 +268,17 @@ config_namespace! { /// Defaults to the number of CPU cores on the system pub planning_concurrency: usize, default = num_cpus::get() + /// When set to true, skips verifying that the schema produced by + /// planning the input of `LogicalPlan::Aggregate` exactly matches the + /// schema of the input plan. + /// + /// When set to false, if the schema does not match exactly + /// (including nullability and metadata), a planning error will be raised. + /// + /// This is used to workaround bugs in the planner that are now caught by + /// the new schema verification step. + pub skip_physical_aggregate_schema_check: bool, default = false + /// Specifies the reserved memory for each spillable sort operation to /// facilitate an in-memory merge. /// @@ -388,7 +399,7 @@ config_namespace! { /// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, /// and `Binary/BinaryLarge` with `BinaryView`. - pub schema_force_view_types: bool, default = false + pub schema_force_view_types: bool, default = true /// (reading) If true, parquet reader will read columns of /// `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs index ed9a68c19536..31eafc744390 100644 --- a/datafusion/common/src/functional_dependencies.rs +++ b/datafusion/common/src/functional_dependencies.rs @@ -334,7 +334,7 @@ impl FunctionalDependencies { left_func_dependencies.extend(right_func_dependencies); left_func_dependencies } - JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { // These joins preserve functional dependencies of the left side: left_func_dependencies } diff --git a/datafusion/common/src/join_type.rs b/datafusion/common/src/join_type.rs index d502e7836da3..e98f34199b27 100644 --- a/datafusion/common/src/join_type.rs +++ b/datafusion/common/src/join_type.rs @@ -44,6 +44,20 @@ pub enum JoinType { LeftAnti, /// Right Anti Join RightAnti, + /// Left Mark join + /// + /// Returns one record for each record from the left input. The output contains an additional + /// column "mark" which is true if there is at least one match in the right input where the + /// join condition evaluates to true. Otherwise, the mark column is false. For more details see + /// [1]. This join type is used to decorrelate EXISTS subqueries used inside disjunctive + /// predicates. + /// + /// Note: This we currently do not implement the full null semantics for the mark join described + /// in [1] which will be needed if we and ANY subqueries. In our version the mark column will + /// only be true for had a match and false when no match was found, never null. + /// + /// [1]: http://btw2017.informatik.uni-stuttgart.de/slidesandpapers/F1-10-37/paper_web.pdf + LeftMark, } impl JoinType { @@ -63,6 +77,7 @@ impl Display for JoinType { JoinType::RightSemi => "RightSemi", JoinType::LeftAnti => "LeftAnti", JoinType::RightAnti => "RightAnti", + JoinType::LeftMark => "LeftMark", }; write!(f, "{join_type}") } @@ -82,6 +97,7 @@ impl FromStr for JoinType { "RIGHTSEMI" => Ok(JoinType::RightSemi), "LEFTANTI" => Ok(JoinType::LeftAnti), "RIGHTANTI" => Ok(JoinType::RightAnti), + "LEFTMARK" => Ok(JoinType::LeftMark), _ => _not_impl_err!("The join type {s} does not exist or is not implemented"), } } @@ -101,6 +117,7 @@ impl Display for JoinSide { match self { JoinSide::Left => write!(f, "left"), JoinSide::Right => write!(f, "right"), + JoinSide::None => write!(f, "none"), } } } @@ -113,6 +130,9 @@ pub enum JoinSide { Left, /// Right side of the join Right, + /// Neither side of the join, used for Mark joins where the mark column does not belong to + /// either side of the join + None, } impl JoinSide { @@ -121,6 +141,7 @@ impl JoinSide { match self { JoinSide::Left => JoinSide::Right, JoinSide::Right => JoinSide::Left, + JoinSide::None => JoinSide::None, } } } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 7a1eaa2ad65b..5595f4f9fa70 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -980,6 +980,11 @@ impl ScalarValue { ScalarValue::from(val.into()) } + /// Returns a [`ScalarValue::Utf8View`] representing `val` + pub fn new_utf8view(val: impl Into) -> Self { + ScalarValue::Utf8View(Some(val.into())) + } + /// Returns a [`ScalarValue::IntervalYearMonth`] representing /// `years` years and `months` months pub fn new_interval_ym(years: i32, months: i32) -> Self { diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index e669c674f78a..1aa42705e7f8 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -258,6 +258,26 @@ impl Statistics { self } + /// Project the statistics to the given column indices. + /// + /// For example, if we had statistics for columns `{"a", "b", "c"}`, + /// projecting to `vec![2, 1]` would return statistics for columns `{"c", + /// "b"}`. + pub fn project(mut self, projection: Option<&Vec>) -> Self { + let Some(projection) = projection else { + return self; + }; + + // todo: it would be nice to avoid cloning column statistics if + // possible (e.g. if the projection did not contain duplicates) + self.column_statistics = projection + .iter() + .map(|&i| self.column_statistics[i].clone()) + .collect(); + + self + } + /// Calculates the statistics after `fetch` and `skip` operations apply. /// Here, `self` denotes per-partition statistics. Use the `n_partitions` /// parameter to compute global statistics in a multi-partition setting. diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs index 132620d681b3..4646c7dedd85 100644 --- a/datafusion/common/src/types/native.rs +++ b/datafusion/common/src/types/native.rs @@ -24,6 +24,7 @@ use arrow::compute::can_cast_types; use arrow_schema::{ DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields, }; +use std::sync::Arc; use std::sync::{Arc, OnceLock}; /// Representation of a type that DataFusion can handle natively. It is a subset @@ -456,3 +457,8 @@ impl NativeType { // pub fn logical_float64() -> LogicalTypeRef { // Arc::clone(LOGICAL_FLOAT64.get_or_init(|| Arc::new(NativeType::Float64))) // } +// impl From<&DataType> for NativeType { +// fn from(value: &DataType) -> Self { +// value.clone().into() +// } +// } diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs index 3ad71be1f447..349c2e438195 100644 --- a/datafusion/core/benches/physical_plan.rs +++ b/datafusion/core/benches/physical_plan.rs @@ -36,6 +36,7 @@ use datafusion::physical_plan::{ memory::MemoryExec, }; use datafusion::prelude::SessionContext; +use datafusion_physical_expr_common::sort_expr::LexOrdering; // Initialise the operator using the provided record batches and the sort key // as inputs. All record batches must have the same schema. @@ -52,7 +53,7 @@ fn sort_preserving_merge_operator( expr: col(name, &schema).unwrap(), options: Default::default(), }) - .collect::>(); + .collect::(); let exec = MemoryExec::try_new( &batches.into_iter().map(|rb| vec![rb]).collect::>(), diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs index 99a74b61b3e0..14e80ce364e3 100644 --- a/datafusion/core/benches/sort.rs +++ b/datafusion/core/benches/sort.rs @@ -89,6 +89,7 @@ use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; /// Benchmarks for SortPreservingMerge stream use criterion::{criterion_group, criterion_main, Criterion}; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use futures::StreamExt; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -257,7 +258,7 @@ impl BenchCase { } /// Make sort exprs for each column in `schema` -fn make_sort_exprs(schema: &Schema) -> Vec { +fn make_sort_exprs(schema: &Schema) -> LexOrdering { schema .fields() .iter() diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index e5d352a63c7a..2c71cb80d755 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -3864,6 +3864,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftAnti, JoinType::RightAnti, + JoinType::LeftMark, ]; let default_partition_count = SessionConfig::new().target_partitions(); @@ -3881,7 +3882,10 @@ mod tests { let join_schema = physical_plan.schema(); match join_type { - JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::Left + | JoinType::LeftSemi + | JoinType::LeftAnti + | JoinType::LeftMark => { let left_exprs: Vec> = vec![ Arc::new(Column::new_with_schema("c1", &join_schema)?), Arc::new(Column::new_with_schema("c2", &join_schema)?), diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index 3cb5ae4f85ca..2aaef2cda1c8 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -325,7 +325,13 @@ impl FileFormat for CsvFormat { let stream = self.read_to_delimited_chunks(store, object).await; let (schema, records_read) = self .infer_schema_from_stream(state, records_to_read, stream) - .await?; + .await + .map_err(|err| { + DataFusionError::Context( + format!("Error when processing CSV file {}", &object.location), + Box::new(err), + ) + })?; records_to_read -= records_read; schemas.push(schema); if records_to_read == 0 { @@ -433,11 +439,13 @@ impl CsvFormat { let mut total_records_read = 0; let mut column_names = vec![]; let mut column_type_possibilities = vec![]; - let mut first_chunk = true; + let mut record_number = -1; pin_mut!(stream); while let Some(chunk) = stream.next().await.transpose()? { + record_number += 1; + let first_chunk = record_number == 0; let mut format = arrow::csv::reader::Format::default() .with_header( first_chunk @@ -471,14 +479,14 @@ impl CsvFormat { (field.name().clone(), possibilities) }) .unzip(); - first_chunk = false; } else { if fields.len() != column_type_possibilities.len() { return exec_err!( "Encountered unequal lengths between records on CSV file whilst inferring schema. \ - Expected {} records, found {} records", + Expected {} fields, found {} fields at record {}", column_type_possibilities.len(), - fields.len() + fields.len(), + record_number + 1 ); } diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 9153e71a5c26..b3f54e0773fd 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -2274,47 +2274,7 @@ mod tests { #[tokio::test] async fn parquet_sink_write() -> Result<()> { - let field_a = Field::new("a", DataType::Utf8, false); - let field_b = Field::new("b", DataType::Utf8, false); - let schema = Arc::new(Schema::new(vec![field_a, field_b])); - let object_store_url = ObjectStoreUrl::local_filesystem(); - - let file_sink_config = FileSinkConfig { - object_store_url: object_store_url.clone(), - file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], - table_paths: vec![ListingTableUrl::parse("file:///")?], - output_schema: schema.clone(), - table_partition_cols: vec![], - insert_op: InsertOp::Overwrite, - keep_partition_by_columns: false, - }; - let parquet_sink = Arc::new(ParquetSink::new( - file_sink_config, - TableParquetOptions { - key_value_metadata: std::collections::HashMap::from([ - ("my-data".to_string(), Some("stuff".to_string())), - ("my-data-bool-key".to_string(), None), - ]), - ..Default::default() - }, - )); - - // create data - let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"])); - let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"])); - let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap(); - - // write stream - parquet_sink - .write_all( - Box::pin(RecordBatchStreamAdapter::new( - schema, - futures::stream::iter(vec![Ok(batch)]), - )), - &build_ctx(object_store_url.as_ref()), - ) - .await - .unwrap(); + let parquet_sink = create_written_parquet_sink("file:///").await?; // assert written let mut written = parquet_sink.written(); @@ -2366,6 +2326,140 @@ mod tests { Ok(()) } + #[tokio::test] + async fn parquet_sink_write_with_extension() -> Result<()> { + let filename = "test_file.custom_ext"; + let file_path = format!("file:///path/to/{}", filename); + let parquet_sink = create_written_parquet_sink(file_path.as_str()).await?; + + // assert written + let mut written = parquet_sink.written(); + let written = written.drain(); + assert_eq!( + written.len(), + 1, + "expected a single parquet file to be written, instead found {}", + written.len() + ); + + let (path, ..) = written.take(1).next().unwrap(); + + let path_parts = path.parts().collect::>(); + assert_eq!( + path_parts.len(), + 3, + "Expected 3 path parts, instead found {}", + path_parts.len() + ); + assert_eq!(path_parts.last().unwrap().as_ref(), filename); + + Ok(()) + } + + #[tokio::test] + async fn parquet_sink_write_with_directory_name() -> Result<()> { + let file_path = "file:///path/to"; + let parquet_sink = create_written_parquet_sink(file_path).await?; + + // assert written + let mut written = parquet_sink.written(); + let written = written.drain(); + assert_eq!( + written.len(), + 1, + "expected a single parquet file to be written, instead found {}", + written.len() + ); + + let (path, ..) = written.take(1).next().unwrap(); + + let path_parts = path.parts().collect::>(); + assert_eq!( + path_parts.len(), + 3, + "Expected 3 path parts, instead found {}", + path_parts.len() + ); + assert!(path_parts.last().unwrap().as_ref().ends_with(".parquet")); + + Ok(()) + } + + #[tokio::test] + async fn parquet_sink_write_with_folder_ending() -> Result<()> { + let file_path = "file:///path/to/"; + let parquet_sink = create_written_parquet_sink(file_path).await?; + + // assert written + let mut written = parquet_sink.written(); + let written = written.drain(); + assert_eq!( + written.len(), + 1, + "expected a single parquet file to be written, instead found {}", + written.len() + ); + + let (path, ..) = written.take(1).next().unwrap(); + + let path_parts = path.parts().collect::>(); + assert_eq!( + path_parts.len(), + 3, + "Expected 3 path parts, instead found {}", + path_parts.len() + ); + assert!(path_parts.last().unwrap().as_ref().ends_with(".parquet")); + + Ok(()) + } + + async fn create_written_parquet_sink(table_path: &str) -> Result> { + let field_a = Field::new("a", DataType::Utf8, false); + let field_b = Field::new("b", DataType::Utf8, false); + let schema = Arc::new(Schema::new(vec![field_a, field_b])); + let object_store_url = ObjectStoreUrl::local_filesystem(); + + let file_sink_config = FileSinkConfig { + object_store_url: object_store_url.clone(), + file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], + table_paths: vec![ListingTableUrl::parse(table_path)?], + output_schema: schema.clone(), + table_partition_cols: vec![], + insert_op: InsertOp::Overwrite, + keep_partition_by_columns: false, + }; + let parquet_sink = Arc::new(ParquetSink::new( + file_sink_config, + TableParquetOptions { + key_value_metadata: std::collections::HashMap::from([ + ("my-data".to_string(), Some("stuff".to_string())), + ("my-data-bool-key".to_string(), None), + ]), + ..Default::default() + }, + )); + + // create data + let col_a: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar"])); + let col_b: ArrayRef = Arc::new(StringArray::from(vec!["baz", "baz"])); + let batch = RecordBatch::try_from_iter(vec![("a", col_a), ("b", col_b)]).unwrap(); + + // write stream + parquet_sink + .write_all( + Box::pin(RecordBatchStreamAdapter::new( + schema, + futures::stream::iter(vec![Ok(batch)]), + )), + &build_ctx(object_store_url.as_ref()), + ) + .await + .unwrap(); + + Ok(parquet_sink) + } + #[tokio::test] async fn parquet_sink_write_partitions() -> Result<()> { let field_a = Field::new("a", DataType::Utf8, false); diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index 1746ffef8282..b03676d53271 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -59,8 +59,9 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>; /// which should be contained within the same output file. The outer channel /// is used to send a dynamic number of inner channels, representing a dynamic /// number of total output files. The caller is also responsible to monitor -/// the demux task for errors and abort accordingly. The single_file_output parameter -/// overrides all other settings to force only a single file to be written. +/// the demux task for errors and abort accordingly. A path with an extension will +/// force only a single file to be written with the extension from the path. Otherwise +/// the default extension will be used and the output will be split into multiple files. /// partition_by parameter will additionally split the input based on the unique /// values of a specific column ``` /// ┌───────────┐ ┌────────────┐ ┌─────────────┐ @@ -79,12 +80,13 @@ pub(crate) fn start_demuxer_task( context: &Arc, partition_by: Option>, base_output_path: ListingTableUrl, - file_extension: String, + default_extension: String, keep_partition_by_columns: bool, ) -> (SpawnedTask>, DemuxedStreamReceiver) { let (tx, rx) = mpsc::unbounded_channel(); let context = context.clone(); - let single_file_output = !base_output_path.is_collection(); + let single_file_output = + !base_output_path.is_collection() && base_output_path.file_extension().is_some(); let task = match partition_by { Some(parts) => { // There could be an arbitrarily large number of parallel hive style partitions being written to, so we cannot @@ -96,7 +98,7 @@ pub(crate) fn start_demuxer_task( context, parts, base_output_path, - file_extension, + default_extension, keep_partition_by_columns, ) .await @@ -108,7 +110,7 @@ pub(crate) fn start_demuxer_task( input, context, base_output_path, - file_extension, + default_extension, single_file_output, ) .await diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index ea2e098ef14e..15125fe5a090 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -1283,13 +1283,16 @@ mod tests { // ok with one column ( vec![vec![col("string_col").sort(true, false)]], - Ok(vec![vec![PhysicalSortExpr { - expr: physical_col("string_col", &schema).unwrap(), - options: SortOptions { - descending: false, - nulls_first: false, - }, - }]]) + Ok(vec![LexOrdering { + inner: vec![PhysicalSortExpr { + expr: physical_col("string_col", &schema).unwrap(), + options: SortOptions { + descending: false, + nulls_first: false, + }, + }], + } + ]) ), // ok with two columns, different options ( @@ -1297,15 +1300,17 @@ mod tests { col("string_col").sort(true, false), col("int_col").sort(false, true), ]], - Ok(vec![vec![ - PhysicalSortExpr::new_default(physical_col("string_col", &schema).unwrap()) - .asc() - .nulls_last(), - - PhysicalSortExpr::new_default(physical_col("int_col", &schema).unwrap()) - .desc() - .nulls_first() - ]]) + Ok(vec![LexOrdering { + inner: vec![ + PhysicalSortExpr::new_default(physical_col("string_col", &schema).unwrap()) + .asc() + .nulls_last(), + PhysicalSortExpr::new_default(physical_col("int_col", &schema).unwrap()) + .desc() + .nulls_first() + ], + } + ]) ), ]; diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index 1701707fdb72..e627cacfbfc7 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -190,6 +190,19 @@ impl ListingTableUrl { self.url.path().ends_with(DELIMITER) } + /// Returns the file extension of the last path segment if it exists + pub fn file_extension(&self) -> Option<&str> { + if let Some(segments) = self.url.path_segments() { + if let Some(last_segment) = segments.last() { + if last_segment.contains(".") && !last_segment.ends_with(".") { + return last_segment.split('.').last(); + } + } + } + + None + } + /// Strips the prefix of this [`ListingTableUrl`] from the provided path, returning /// an iterator of the remaining path segments pub(crate) fn strip_prefix<'a, 'b: 'a>( @@ -493,4 +506,54 @@ mod tests { "path not ends with / - fragment ends with / - not collection", ); } + + #[test] + fn test_file_extension() { + fn test(input: &str, expected: Option<&str>, message: &str) { + let url = ListingTableUrl::parse(input).unwrap(); + assert_eq!(url.file_extension(), expected, "{message}"); + } + + test("https://a.b.c/path/", None, "path ends with / - not a file"); + test( + "https://a.b.c/path/?a=b", + None, + "path ends with / - with query args - not a file", + ); + test( + "https://a.b.c/path?a=b/", + None, + "path not ends with / - query ends with / but no file extension", + ); + test( + "https://a.b.c/path/#a=b", + None, + "path ends with / - with fragment - not a file", + ); + test( + "https://a.b.c/path#a=b/", + None, + "path not ends with / - fragment ends with / but no file extension", + ); + test( + "file///some/path/", + None, + "file path ends with / - not a file", + ); + test( + "file///some/path/file", + None, + "file path does not end with - no extension", + ); + test( + "file///some/path/file.", + None, + "file path ends with . - no value after .", + ); + test( + "file///some/path/file.ext", + Some("ext"), + "file path ends with .ext - extension is ext", + ); + } } diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 0ed53418fe32..ad369b75e130 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -62,7 +62,7 @@ fn create_ordering( for exprs in sort_order { // Construct PhysicalSortExpr objects from Expr objects: - let mut sort_exprs = vec![]; + let mut sort_exprs = LexOrdering::default(); for sort in exprs { match &sort.expr { Expr::Column(col) => match expressions::col(&col.name, schema) { diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs index 96c0e452e29e..74ab0126a557 100644 --- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs +++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs @@ -34,7 +34,8 @@ use arrow_array::{ArrayRef, DictionaryArray, RecordBatch, RecordBatchOptions}; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion_common::stats::Precision; use datafusion_common::{exec_err, ColumnStatistics, DataFusionError, Statistics}; -use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr::LexOrdering; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use log::warn; @@ -307,7 +308,7 @@ impl FileScanConfig { pub fn split_groups_by_statistics( table_schema: &SchemaRef, file_groups: &[Vec], - sort_order: &[PhysicalSortExpr], + sort_order: LexOrderingRef, ) -> Result>> { let flattened_files = file_groups.iter().flatten().collect::>(); // First Fit: diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs index 407a3b74f79f..9971e87282a5 100644 --- a/datafusion/core/src/datasource/physical_plan/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/mod.rs @@ -65,6 +65,7 @@ use crate::{ use arrow::datatypes::{DataType, SchemaRef}; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use futures::StreamExt; use log::debug; @@ -328,11 +329,11 @@ impl From for FileMeta { fn get_projected_output_ordering( base_config: &FileScanConfig, projected_schema: &SchemaRef, -) -> Vec> { +) -> Vec { let mut all_orderings = vec![]; for output_ordering in &base_config.output_ordering { - let mut new_ordering = vec![]; - for PhysicalSortExpr { expr, options } in output_ordering { + let mut new_ordering = LexOrdering::default(); + for PhysicalSortExpr { expr, options } in output_ordering.iter() { if let Some(col) = expr.as_any().downcast_ref::() { let name = col.name(); if let Some((idx, _)) = projected_schema.column_with_name(name) { diff --git a/datafusion/core/src/datasource/physical_plan/statistics.rs b/datafusion/core/src/datasource/physical_plan/statistics.rs index 3ca3ba89f4d9..6af153a731b0 100644 --- a/datafusion/core/src/datasource/physical_plan/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/statistics.rs @@ -26,6 +26,8 @@ use std::sync::Arc; +use crate::datasource::listing::PartitionedFile; + use arrow::{ compute::SortColumn, row::{Row, Rows}, @@ -34,8 +36,7 @@ use arrow_array::RecordBatch; use arrow_schema::SchemaRef; use datafusion_common::{DataFusionError, Result}; use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr}; - -use crate::datasource::listing::PartitionedFile; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; /// A normalized representation of file min/max statistics that allows for efficient sorting & comparison. /// The min/max values are ordered by [`Self::sort_order`]. @@ -43,13 +44,13 @@ use crate::datasource::listing::PartitionedFile; pub(crate) struct MinMaxStatistics { min_by_sort_order: Rows, max_by_sort_order: Rows, - sort_order: Vec, + sort_order: LexOrdering, } impl MinMaxStatistics { /// Sort order used to sort the statistics #[allow(unused)] - pub fn sort_order(&self) -> &[PhysicalSortExpr] { + pub fn sort_order(&self) -> LexOrderingRef { &self.sort_order } @@ -65,8 +66,8 @@ impl MinMaxStatistics { } pub fn new_from_files<'a>( - projected_sort_order: &[PhysicalSortExpr], // Sort order with respect to projected schema - projected_schema: &SchemaRef, // Projected schema + projected_sort_order: LexOrderingRef, // Sort order with respect to projected schema + projected_schema: &SchemaRef, // Projected schema projection: Option<&[usize]>, // Indices of projection in full table schema (None = all columns) files: impl IntoIterator, ) -> Result { @@ -166,7 +167,7 @@ impl MinMaxStatistics { } pub fn new( - sort_order: &[PhysicalSortExpr], + sort_order: LexOrderingRef, schema: &SchemaRef, min_values: RecordBatch, max_values: RecordBatch, @@ -256,7 +257,7 @@ impl MinMaxStatistics { Ok(Self { min_by_sort_order: min.map_err(|e| e.context("build min rows"))?, max_by_sort_order: max.map_err(|e| e.context("build max rows"))?, - sort_order: sort_order.to_vec(), + sort_order: LexOrdering::from_ref(sort_order), }) } @@ -277,7 +278,7 @@ impl MinMaxStatistics { } fn sort_columns_from_physical_sort_exprs( - sort_order: &[PhysicalSortExpr], + sort_order: LexOrderingRef, ) -> Option> { sort_order .iter() diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index aa4bcb683749..6cd902db7244 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -52,11 +52,12 @@ use datafusion_physical_expr::utils::map_columns_before_projection; use datafusion_physical_expr::{ physical_exprs_equal, EquivalenceProperties, PhysicalExpr, PhysicalExprRef, }; +use datafusion_physical_expr_common::sort_expr::LexOrdering; +use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; +use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec}; use datafusion_physical_plan::ExecutionPlanProperties; -use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; -use datafusion_physical_optimizer::PhysicalOptimizerRule; use itertools::izip; /// The `EnforceDistribution` rule ensures that distribution requirements are @@ -328,7 +329,8 @@ fn adjust_input_keys_ordering( JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti - | JoinType::Full => vec![], + | JoinType::Full + | JoinType::LeftMark => vec![], }; } PartitionMode::Auto => { @@ -934,7 +936,7 @@ fn add_spm_on_top(input: DistributionContext) -> DistributionContext { let new_plan = if should_preserve_ordering { Arc::new(SortPreservingMergeExec::new( - input.plan.output_ordering().unwrap_or(&[]).to_vec(), + LexOrdering::from_ref(input.plan.output_ordering().unwrap_or(&[])), input.plan.clone(), )) as _ } else { @@ -1434,7 +1436,7 @@ pub(crate) mod tests { impl SortRequiredExec { fn new_with_requirement( input: Arc, - requirement: Vec, + requirement: LexOrdering, ) -> Self { let cache = Self::compute_properties(&input); Self { @@ -1460,11 +1462,7 @@ pub(crate) mod tests { _t: DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - write!( - f, - "SortRequiredExec: [{}]", - PhysicalSortExpr::format_list(&self.expr) - ) + write!(f, "SortRequiredExec: [{}]", self.expr) } } @@ -1494,7 +1492,9 @@ pub(crate) mod tests { if self.expr.is_empty() { vec![None] } else { - vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))] + vec![Some(PhysicalSortRequirement::from_sort_exprs( + self.expr.iter(), + ))] } } @@ -1539,7 +1539,7 @@ pub(crate) mod tests { /// create a single parquet file that is sorted pub(crate) fn parquet_exec_with_sort( - output_ordering: Vec>, + output_ordering: Vec, ) -> Arc { ParquetExec::builder( FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) @@ -1555,7 +1555,7 @@ pub(crate) mod tests { /// Created a sorted parquet exec with multiple files fn parquet_exec_multiple_sorted( - output_ordering: Vec>, + output_ordering: Vec, ) -> Arc { ParquetExec::builder( FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) @@ -1572,7 +1572,7 @@ pub(crate) mod tests { csv_exec_with_sort(vec![]) } - fn csv_exec_with_sort(output_ordering: Vec>) -> Arc { + fn csv_exec_with_sort(output_ordering: Vec) -> Arc { Arc::new( CsvExec::builder( FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) @@ -1595,9 +1595,7 @@ pub(crate) mod tests { } // Created a sorted parquet exec with multiple files - fn csv_exec_multiple_sorted( - output_ordering: Vec>, - ) -> Arc { + fn csv_exec_multiple_sorted(output_ordering: Vec) -> Arc { Arc::new( CsvExec::builder( FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) @@ -1727,7 +1725,7 @@ pub(crate) mod tests { } fn sort_exec( - sort_exprs: Vec, + sort_exprs: LexOrdering, input: Arc, preserve_partitioning: bool, ) -> Arc { @@ -1737,7 +1735,7 @@ pub(crate) mod tests { } fn sort_preserving_merge_exec( - sort_exprs: Vec, + sort_exprs: LexOrdering, input: Arc, ) -> Arc { Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) @@ -1959,6 +1957,7 @@ pub(crate) mod tests { JoinType::Full, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightSemi, JoinType::RightAnti, ]; @@ -1981,7 +1980,8 @@ pub(crate) mod tests { | JoinType::Right | JoinType::Full | JoinType::LeftSemi - | JoinType::LeftAnti => { + | JoinType::LeftAnti + | JoinType::LeftMark => { // Join on (a == c) let top_join_on = vec![( Arc::new(Column::new_with_schema("a", &join.schema()).unwrap()) @@ -1999,7 +1999,7 @@ pub(crate) mod tests { let expected = match join_type { // Should include 3 RepartitionExecs - JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => vec![ + JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => vec![ top_join_plan.as_str(), join_plan.as_str(), "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10", @@ -2098,7 +2098,7 @@ pub(crate) mod tests { assert_optimized!(expected, top_join.clone(), true); assert_optimized!(expected, top_join, false); } - JoinType::LeftSemi | JoinType::LeftAnti => {} + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {} } } @@ -3073,7 +3073,7 @@ pub(crate) mod tests { // Only two RepartitionExecs added let expected = &[ "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]", - "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[true]", + "SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]", "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]", "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]", "AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]", @@ -3081,7 +3081,7 @@ pub(crate) mod tests { "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]", - "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[true]", + "SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]", "ProjectionExec: expr=[a@1 as a2, b@0 as b2]", "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]", "RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10", @@ -3093,9 +3093,9 @@ pub(crate) mod tests { let expected_first_sort_enforcement = &[ "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]", - "RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC,a3@0 ASC", + "RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC, a3@0 ASC", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[false]", + "SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]", "CoalescePartitionsExec", "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]", "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]", @@ -3104,9 +3104,9 @@ pub(crate) mod tests { "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]", - "RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC,a2@0 ASC", + "RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC, a2@0 ASC", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[false]", + "SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]", "CoalescePartitionsExec", "ProjectionExec: expr=[a@1 as a2, b@0 as b2]", "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]", @@ -3124,10 +3124,10 @@ pub(crate) mod tests { fn merge_does_not_need_sort() -> Result<()> { // see https://github.com/apache/datafusion/issues/4331 let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); // Scan some sorted parquet files let exec = parquet_exec_multiple_sorted(vec![sort_key.clone()]); @@ -3326,10 +3326,10 @@ pub(crate) mod tests { #[test] fn repartition_sorted_limit() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = limit_exec(sort_exec(sort_key, parquet_exec(), false)); let expected = &[ @@ -3348,10 +3348,10 @@ pub(crate) mod tests { #[test] fn repartition_sorted_limit_with_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_required_exec_with_req( filter_exec(sort_exec(sort_key.clone(), parquet_exec(), false)), sort_key, @@ -3427,10 +3427,10 @@ pub(crate) mod tests { fn repartition_through_sort_preserving_merge() -> Result<()> { // sort preserving merge with non-sorted input let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_preserving_merge_exec(sort_key, parquet_exec()); // need resort as the data was not sorted correctly @@ -3448,10 +3448,10 @@ pub(crate) mod tests { fn repartition_ignores_sort_preserving_merge() -> Result<()> { // sort preserving merge already sorted input, let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_preserving_merge_exec( sort_key.clone(), parquet_exec_multiple_sorted(vec![sort_key]), @@ -3480,10 +3480,10 @@ pub(crate) mod tests { fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> { // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved) let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]); let plan = sort_preserving_merge_exec(sort_key, input); @@ -3514,10 +3514,10 @@ pub(crate) mod tests { // SortRequired // Parquet(sorted) let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("d", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_required_exec_with_req( filter_exec(parquet_exec_with_sort(vec![sort_key.clone()])), sort_key, @@ -3549,10 +3549,10 @@ pub(crate) mod tests { // Parquet(unsorted) let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input1 = sort_required_exec_with_req( parquet_exec_with_sort(vec![sort_key.clone()]), sort_key, @@ -3591,10 +3591,10 @@ pub(crate) mod tests { )]; // non sorted input let proj = Arc::new(ProjectionExec::try_new(proj_exprs, parquet_exec())?); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("sum", &proj.schema()).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_preserving_merge_exec(sort_key, proj); let expected = &[ @@ -3624,10 +3624,10 @@ pub(crate) mod tests { #[test] fn repartition_ignores_transitively_with_projection() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let alias = vec![ ("a".to_string(), "a".to_string()), ("b".to_string(), "b".to_string()), @@ -3657,10 +3657,10 @@ pub(crate) mod tests { #[test] fn repartition_transitively_past_sort_with_projection() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let alias = vec![ ("a".to_string(), "a".to_string()), ("b".to_string(), "b".to_string()), @@ -3690,10 +3690,10 @@ pub(crate) mod tests { #[test] fn repartition_transitively_past_sort_with_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_exec(sort_key, filter_exec(parquet_exec()), false); let expected = &[ @@ -3724,10 +3724,10 @@ pub(crate) mod tests { #[cfg(feature = "parquet")] fn repartition_transitively_past_sort_with_projection_and_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = sort_exec( sort_key, projection_exec_with_alias( @@ -3794,10 +3794,10 @@ pub(crate) mod tests { #[test] fn parallelization_multiple_files() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan = filter_exec(parquet_exec_multiple_sorted(vec![sort_key.clone()])); let plan = sort_required_exec_with_req(plan, sort_key); @@ -3958,10 +3958,10 @@ pub(crate) mod tests { #[test] fn parallelization_sorted_limit() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan_parquet = limit_exec(sort_exec(sort_key.clone(), parquet_exec(), false)); let plan_csv = limit_exec(sort_exec(sort_key, csv_exec(), false)); @@ -3990,10 +3990,10 @@ pub(crate) mod tests { #[test] fn parallelization_limit_with_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let plan_parquet = limit_exec(filter_exec(sort_exec( sort_key.clone(), parquet_exec(), @@ -4113,10 +4113,10 @@ pub(crate) mod tests { #[test] fn parallelization_prior_to_sort_preserving_merge() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); // sort preserving merge already sorted input, let plan_parquet = sort_preserving_merge_exec( sort_key.clone(), @@ -4143,10 +4143,10 @@ pub(crate) mod tests { #[test] fn parallelization_sort_preserving_merge_with_union() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved) let input_parquet = union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]); @@ -4177,10 +4177,10 @@ pub(crate) mod tests { #[test] fn parallelization_does_not_benefit() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); // SortRequired // Parquet(sorted) let plan_parquet = sort_required_exec_with_req( @@ -4211,10 +4211,10 @@ pub(crate) mod tests { fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()> { // sorted input let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); //Projection(a as a2, b as b2) let alias_pairs: Vec<(String, String)> = vec![ @@ -4225,10 +4225,10 @@ pub(crate) mod tests { parquet_exec_with_sort(vec![sort_key]), alias_pairs, ); - let sort_key_after_projection = vec![PhysicalSortExpr { + let sort_key_after_projection = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c2", &proj_parquet.schema()).unwrap(), options: SortOptions::default(), - }]; + }]); let plan_parquet = sort_preserving_merge_exec(sort_key_after_projection, proj_parquet); let expected = &[ @@ -4252,10 +4252,10 @@ pub(crate) mod tests { fn parallelization_ignores_transitively_with_projection_csv() -> Result<()> { // sorted input let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); //Projection(a as a2, b as b2) let alias_pairs: Vec<(String, String)> = vec![ @@ -4265,10 +4265,10 @@ pub(crate) mod tests { let proj_csv = projection_exec_with_alias(csv_exec_with_sort(vec![sort_key]), alias_pairs); - let sort_key_after_projection = vec![PhysicalSortExpr { + let sort_key_after_projection = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c2", &proj_csv.schema()).unwrap(), options: SortOptions::default(), - }]; + }]); let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv); let expected = &[ "SortPreservingMergeExec: [c2@1 ASC]", @@ -4315,10 +4315,10 @@ pub(crate) mod tests { #[test] fn remove_unnecessary_spm_after_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); @@ -4340,10 +4340,10 @@ pub(crate) mod tests { #[test] fn preserve_ordering_through_repartition() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("d", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); @@ -4363,10 +4363,10 @@ pub(crate) mod tests { #[test] fn do_not_preserve_ordering_through_repartition() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); @@ -4395,10 +4395,10 @@ pub(crate) mod tests { #[test] fn no_need_for_sort_after_filter() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); @@ -4419,16 +4419,16 @@ pub(crate) mod tests { #[test] fn do_not_preserve_ordering_through_repartition2() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key]); - let sort_req = vec![PhysicalSortExpr { + let sort_req = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let physical_plan = sort_preserving_merge_exec(sort_req, filter_exec(input)); let expected = &[ @@ -4457,10 +4457,10 @@ pub(crate) mod tests { #[test] fn do_not_preserve_ordering_through_repartition3() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key]); let physical_plan = filter_exec(input); @@ -4478,10 +4478,10 @@ pub(crate) mod tests { #[test] fn do_not_put_sort_when_input_is_invalid() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec(); let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key); let expected = &[ @@ -4515,10 +4515,10 @@ pub(crate) mod tests { #[test] fn put_sort_when_input_is_valid() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key); @@ -4552,10 +4552,10 @@ pub(crate) mod tests { #[test] fn do_not_add_unnecessary_hash() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let alias = vec![("a".to_string(), "a".to_string())]; let input = parquet_exec_with_sort(vec![sort_key]); let physical_plan = aggregate_exec_with_alias(input, alias); @@ -4575,10 +4575,10 @@ pub(crate) mod tests { #[test] fn do_not_add_unnecessary_hash2() -> Result<()> { let schema = schema(); - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let alias = vec![("a".to_string(), "a".to_string())]; let input = parquet_exec_multiple_sorted(vec![sort_key]); let aggregate = aggregate_exec_with_alias(input, alias.clone()); diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index aa28f9d6b6aa..7b111cddc6fd 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -61,13 +61,14 @@ use crate::physical_plan::{Distribution, ExecutionPlan, InputOrderMode}; use datafusion_common::plan_err; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_physical_expr::{Partitioning, PhysicalSortExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::{Partitioning, PhysicalSortRequirement}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; +use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::sorts::partial_sort::PartialSortExec; use datafusion_physical_plan::ExecutionPlanProperties; -use datafusion_physical_optimizer::PhysicalOptimizerRule; use itertools::izip; /// This rule inspects [`SortExec`]'s in the given physical plan and removes the @@ -231,7 +232,7 @@ fn replace_with_partial_sort( if common_prefix_length > 0 { return Ok(Arc::new( PartialSortExec::new( - sort_plan.expr().to_vec(), + LexOrdering::new(sort_plan.expr().to_vec()), sort_plan.input().clone(), common_prefix_length, ) @@ -275,7 +276,7 @@ fn parallelize_sorts( // Take the initial sort expressions and requirements let (sort_exprs, fetch) = get_sort_exprs(&requirements.plan)?; let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs); - let sort_exprs = sort_exprs.to_vec(); + let sort_exprs = LexOrdering::new(sort_exprs.to_vec()); // If there is a connection between a `CoalescePartitionsExec` and a // global sort that satisfy the requirements (i.e. intermediate @@ -390,15 +391,14 @@ fn analyze_immediate_sort_removal( if let Some(sort_exec) = node.plan.as_any().downcast_ref::() { let sort_input = sort_exec.input(); // If this sort is unnecessary, we should remove it: - if sort_input - .equivalence_properties() - .ordering_satisfy(sort_exec.properties().output_ordering().unwrap_or(&[])) - { + if sort_input.equivalence_properties().ordering_satisfy( + sort_exec.properties().output_ordering().unwrap_or_default(), + ) { node.plan = if !sort_exec.preserve_partitioning() && sort_input.output_partitioning().partition_count() > 1 { // Replace the sort with a sort-preserving merge: - let expr = sort_exec.expr().to_vec(); + let expr = LexOrdering::new(sort_exec.expr().to_vec()); Arc::new(SortPreservingMergeExec::new(expr, sort_input.clone())) as _ } else { // Remove the sort: @@ -619,7 +619,10 @@ fn remove_corresponding_sort_from_sub_plan( // `SortPreservingMergeExec` instead of a `CoalescePartitionsExec`. let plan = node.plan.clone(); let plan = if let Some(ordering) = plan.output_ordering() { - Arc::new(SortPreservingMergeExec::new(ordering.to_vec(), plan)) as _ + Arc::new(SortPreservingMergeExec::new( + LexOrdering::new(ordering.to_vec()), + plan, + )) as _ } else { Arc::new(CoalescePartitionsExec::new(plan)) as _ }; @@ -629,10 +632,10 @@ fn remove_corresponding_sort_from_sub_plan( Ok(node) } -/// Converts an [ExecutionPlan] trait object to a [PhysicalSortExpr] slice when possible. +/// Converts an [ExecutionPlan] trait object to a [LexOrderingRef] when possible. fn get_sort_exprs( sort_any: &Arc, -) -> Result<(&[PhysicalSortExpr], Option)> { +) -> Result<(LexOrderingRef, Option)> { if let Some(sort_exec) = sort_any.as_any().downcast_ref::() { Ok((sort_exec.expr(), sort_exec.fetch())) } else if let Some(spm) = sort_any.as_any().downcast_ref::() @@ -645,7 +648,6 @@ fn get_sort_exprs( #[cfg(test)] mod tests { - use super::*; use crate::physical_optimizer::enforce_distribution::EnforceDistribution; use crate::physical_optimizer::test_utils::{ @@ -936,8 +938,8 @@ mod tests { "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " SortPreservingMergeExec: [non_nullable_col@1 ASC]", " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", @@ -961,10 +963,10 @@ mod tests { let sort = sort_exec(sort_exprs.clone(), source); let spm = sort_preserving_merge_exec(sort_exprs, sort); - let sort_exprs = vec![ + let sort_exprs = LexOrdering::new(vec![ sort_expr("nullable_col", &schema), sort_expr("non_nullable_col", &schema), - ]; + ]); let repartition_exec = repartition_exec(spm); let sort2 = Arc::new( SortExec::new(sort_exprs.clone(), repartition_exec) @@ -979,8 +981,8 @@ mod tests { // it with a `CoalescePartitionsExec` instead of directly removing it. let expected_input = [ "AggregateExec: mode=Final, gby=[], aggr=[]", - " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]", + " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " SortPreservingMergeExec: [non_nullable_col@1 ASC]", " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", @@ -1006,7 +1008,7 @@ mod tests { let source2 = repartition_exec(memory_exec(&schema)); let union = union_exec(vec![source1, source2]); - let sort_exprs = vec![sort_expr("non_nullable_col", &schema)]; + let sort_exprs = LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]); // let sort = sort_exec(sort_exprs.clone(), union); let sort = Arc::new( SortExec::new(sort_exprs.clone(), union).with_preserve_partitioning(true), @@ -1029,7 +1031,7 @@ mod tests { // When removing a `SortPreservingMergeExec`, make sure that partitioning // requirements are not violated. In some cases, we may need to replace // it with a `CoalescePartitionsExec` instead of directly removing it. - let expected_input = ["SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + let expected_input = ["SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " FilterExec: NOT non_nullable_col@1", " SortPreservingMergeExec: [non_nullable_col@1 ASC]", " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true]", @@ -1039,8 +1041,8 @@ mod tests { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " MemoryExec: partitions=1, partition_sizes=[0]"]; - let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]", + let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", " FilterExec: NOT non_nullable_col@1", " UnionExec", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", @@ -1085,8 +1087,11 @@ mod tests { let schema = create_test_schema()?; let source = memory_exec(&schema); let input = Arc::new( - SortExec::new(vec![sort_expr("non_nullable_col", &schema)], source) - .with_fetch(Some(2)), + SortExec::new( + LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]), + source, + ) + .with_fetch(Some(2)), ); let physical_plan = sort_exec( vec![ @@ -1097,12 +1102,12 @@ mod tests { ); let expected_input = [ - "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ - "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1115,26 +1120,29 @@ mod tests { let schema = create_test_schema()?; let source = memory_exec(&schema); let input = Arc::new(SortExec::new( - vec![ + LexOrdering::new(vec![ sort_expr("non_nullable_col", &schema), sort_expr("nullable_col", &schema), - ], + ]), source, )); let physical_plan = Arc::new( - SortExec::new(vec![sort_expr("non_nullable_col", &schema)], input) - .with_fetch(Some(2)), + SortExec::new( + LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]), + input, + ) + .with_fetch(Some(2)), ) as Arc; let expected_input = [ "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ "GlobalLimitExec: skip=0, fetch=2", - " SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1147,7 +1155,7 @@ mod tests { let schema = create_test_schema()?; let source = memory_exec(&schema); let input = Arc::new(SortExec::new( - vec![sort_expr("non_nullable_col", &schema)], + LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]), source, )); let limit = Arc::new(LocalLimitExec::new(input, 2)); @@ -1160,14 +1168,14 @@ mod tests { ); let expected_input = [ - "SortExec: expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " LocalLimitExec: fetch=2", " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ "LocalLimitExec: fetch=2", - " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC,nullable_col@0 ASC], preserve_partitioning=[false]", + " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1181,7 +1189,7 @@ mod tests { let source = memory_exec(&schema); // let input = sort_exec(vec![sort_expr("non_nullable_col", &schema)], source); let input = Arc::new(SortExec::new( - vec![sort_expr("non_nullable_col", &schema)], + LexOrdering::new(vec![sort_expr("non_nullable_col", &schema)]), source, )); let limit = Arc::new(GlobalLimitExec::new(input, 0, Some(5))) as _; @@ -1253,24 +1261,24 @@ mod tests { let repartition = repartition_exec(union); let physical_plan = sort_preserving_merge_exec(sort_exprs, repartition); - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // We should keep the bottom `SortExec`. - let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]", + let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1288,12 +1296,12 @@ mod tests { let sort = sort_exec(vec![sort_exprs[0].clone()], source); let physical_plan = sort_preserving_merge_exec(sort_exprs, sort); let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1317,7 +1325,7 @@ mod tests { let expected_input = [ "SortPreservingMergeExec: [non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; let expected_optimized = [ @@ -1409,17 +1417,17 @@ mod tests { // Input is an invalid plan. In this case rule should add required sorting in appropriate places. // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the // required ordering of SortPreservingMergeExec. - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; - let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1450,7 +1458,7 @@ mod tests { // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above). let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", @@ -1490,20 +1498,20 @@ mod tests { // Should modify the plan to ensure that all three inputs to the // `UnionExec` satisfy the ordering, OR add a single sort after // the `UnionExec` (both of which are equally good for this example). - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; - let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1542,9 +1550,9 @@ mod tests { // fine `SortExec`s below with required `SortExec`s that are absolutely necessary. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -1588,7 +1596,7 @@ mod tests { " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", - " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // Should adjust the requirement in the third input of the union so @@ -1625,9 +1633,9 @@ mod tests { // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]", @@ -1676,9 +1684,9 @@ mod tests { // The `UnionExec` doesn't preserve any of the inputs ordering in the // example below. let expected_input = ["UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 DESC NULLS LAST, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // Since `UnionExec` doesn't preserve ordering in the plan above. // We shouldn't keep SortExecs in the plan. @@ -1744,10 +1752,10 @@ mod tests { async fn test_window_multi_path_sort2() -> Result<()> { let schema = create_test_schema()?; - let sort_exprs1 = vec![ + let sort_exprs1 = LexOrdering::new(vec![ sort_expr("nullable_col", &schema), sort_expr("non_nullable_col", &schema), - ]; + ]); let sort_exprs2 = vec![sort_expr("nullable_col", &schema)]; let source1 = parquet_exec_sorted(&schema, sort_exprs2.clone()); let source2 = parquet_exec_sorted(&schema, sort_exprs2.clone()); @@ -1761,11 +1769,11 @@ mod tests { // The `WindowAggExec` can get its required sorting from the leaf nodes directly. // The unnecessary SortExecs should be removed let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC]", @@ -1810,11 +1818,11 @@ mod tests { // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec` let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -1822,7 +1830,7 @@ mod tests { " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1867,7 +1875,7 @@ mod tests { let join_plan2 = format!( " SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" ); - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", + let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", join_plan2.as_str(), " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"]; @@ -1879,7 +1887,7 @@ mod tests { // can push down the sort requirements and save 1 SortExec vec![ join_plan.as_str(), - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]", @@ -1888,7 +1896,7 @@ mod tests { _ => { // can not push down the sort requirements vec![ - "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", join_plan2.as_str(), " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -1938,9 +1946,9 @@ mod tests { ); let spm_plan = match join_type { JoinType::RightAnti => { - "SortPreservingMergeExec: [col_a@0 ASC,col_b@1 ASC]" + "SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC]" } - _ => "SortPreservingMergeExec: [col_a@2 ASC,col_b@3 ASC]", + _ => "SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]", }; let join_plan2 = format!( " SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" @@ -1956,14 +1964,14 @@ mod tests { join_plan.as_str(), " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", - " SortExec: expr=[col_a@0 ASC,col_b@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]", ] } _ => { // can not push down the sort requirements for Left and Full join. vec![ - "SortExec: expr=[col_a@2 ASC,col_b@3 ASC], preserve_partitioning=[false]", + "SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false]", join_plan2.as_str(), " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -2001,13 +2009,13 @@ mod tests { ]; let physical_plan = sort_preserving_merge_exec(sort_exprs1, join.clone()); - let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC,col_a@2 ASC]", + let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"]; // can not push down the sort requirements, need to add SortExec - let expected_optimized = ["SortExec: expr=[col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]", + let expected_optimized = ["SortExec: expr=[col_b@3 ASC, col_a@2 ASC], preserve_partitioning=[false]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -2023,13 +2031,13 @@ mod tests { ]; let physical_plan = sort_preserving_merge_exec(sort_exprs2, join); - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC]", + let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"]; // can not push down the sort requirements, need to add SortExec - let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]", + let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC], preserve_partitioning=[false]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -2069,7 +2077,7 @@ mod tests { let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -2124,7 +2132,7 @@ mod tests { let state = session_ctx.state(); let memory_exec = memory_exec(&schema); - let sort_exprs = vec![sort_expr("nullable_col", &schema)]; + let sort_exprs = LexOrdering::new(vec![sort_expr("nullable_col", &schema)]); let window = bounded_window_exec("nullable_col", sort_exprs.clone(), memory_exec); let repartition = repartition_exec(window); @@ -2174,7 +2182,7 @@ mod tests { let repartition = repartition_exec(source); let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(repartition)); let repartition = repartition_exec(coalesce_partitions); - let sort_exprs = vec![sort_expr("nullable_col", &schema)]; + let sort_exprs = LexOrdering::new(vec![sort_expr("nullable_col", &schema)]); // Add local sort let sort = Arc::new( SortExec::new(sort_exprs.clone(), repartition) @@ -2332,11 +2340,11 @@ mod tests { let physical_plan = sort_exec(vec![sort_expr("b", &schema)], spm); let expected_input = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", + " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",]; let expected_optimized = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", + " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",]; assert_optimized!(expected_input, expected_optimized, physical_plan, false); @@ -2360,12 +2368,12 @@ mod tests { spm, ); - let expected_input = ["SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", + let expected_input = ["SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", + " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",]; - let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", - " SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[true]", + let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", + " SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",]; assert_optimized!(expected_input, expected_optimized, physical_plan, false); @@ -2387,15 +2395,15 @@ mod tests { let expected_input = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC", + " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", + " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - " SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " CoalescePartitionsExec", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", @@ -2418,11 +2426,11 @@ mod tests { ); let expected_input = [ - "SortExec: expr=[a@0 ASC,c@2 ASC], preserve_partitioning=[false]", + "SortExec: expr=[a@0 ASC, c@2 ASC], preserve_partitioning=[false]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" ]; let expected_optimized = [ - "PartialSortExec: expr=[a@0 ASC,c@2 ASC], common_prefix_length=[1]", + "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -2445,12 +2453,12 @@ mod tests { ); let expected_input = [ - "SortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], preserve_partitioning=[false]", + "SortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], preserve_partitioning=[false]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]" ]; // let optimized let expected_optimized = [ - "PartialSortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], common_prefix_length=[2]", + "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]", ]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -2472,7 +2480,7 @@ mod tests { parquet_input, ); let expected_input = [ - "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]", + "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC]" ]; let expected_no_change = expected_input; @@ -2495,7 +2503,7 @@ mod tests { unbounded_input, ); let expected_input = [ - "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]", + "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]" ]; let expected_no_change = expected_input; @@ -2510,8 +2518,8 @@ mod tests { // SortExec: expr=[a] // MemoryExec let schema = create_test_schema3()?; - let sort_exprs_a = vec![sort_expr("a", &schema)]; - let sort_exprs_b = vec![sort_expr("b", &schema)]; + let sort_exprs_a = LexOrdering::new(vec![sort_expr("a", &schema)]); + let sort_exprs_b = LexOrdering::new(vec![sort_expr("b", &schema)]); let plan = memory_exec(&schema); let plan = sort_exec(sort_exprs_a.clone(), plan); let plan = RequirementsTestExec::new(plan) @@ -2540,8 +2548,9 @@ mod tests { // SortExec: expr=[a] // MemoryExec let schema = create_test_schema3()?; - let sort_exprs_a = vec![sort_expr("a", &schema)]; - let sort_exprs_ab = vec![sort_expr("a", &schema), sort_expr("b", &schema)]; + let sort_exprs_a = LexOrdering::new(vec![sort_expr("a", &schema)]); + let sort_exprs_ab = + LexOrdering::new(vec![sort_expr("a", &schema), sort_expr("b", &schema)]); let plan = memory_exec(&schema); let plan = sort_exec(sort_exprs_a.clone(), plan); let plan = RequirementsTestExec::new(plan) @@ -2551,7 +2560,7 @@ mod tests { let plan = sort_exec(sort_exprs_ab, plan); let expected_input = [ - "SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]", + "SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " RequiredInputOrderingExec", " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", @@ -2559,7 +2568,7 @@ mod tests { // should able to push shorts let expected = [ "RequiredInputOrderingExec", - " SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]", + " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " MemoryExec: partitions=1, partition_sizes=[0]", ]; assert_optimized!(expected_input, expected, plan, true); diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index 1c63df1f0281..0312e362afb1 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -40,7 +40,8 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{internal_err, JoinSide, JoinType}; use datafusion_expr::sort_properties::SortProperties; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; +use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_optimizer::PhysicalOptimizerRule; /// The [`JoinSelection`] rule tries to modify a given plan so that it can @@ -132,6 +133,9 @@ fn swap_join_type(join_type: JoinType) -> JoinType { JoinType::RightSemi => JoinType::LeftSemi, JoinType::LeftAnti => JoinType::RightAnti, JoinType::RightAnti => JoinType::LeftAnti, + JoinType::LeftMark => { + unreachable!("LeftMark join type does not support swapping") + } } } @@ -550,7 +554,7 @@ fn hash_join_convert_symmetric_subrule( // the function concludes that no specific order is required for the SymmetricHashJoinExec. This approach // ensures that the symmetric hash join operation only imposes ordering constraints when necessary, // based on the properties of the child nodes and the filter condition. - let determine_order = |side: JoinSide| -> Option> { + let determine_order = |side: JoinSide| -> Option { hash_join .filter() .map(|filter| { @@ -573,6 +577,7 @@ fn hash_join_convert_symmetric_subrule( hash_join.right().equivalence_properties(), hash_join.right().schema(), ), + JoinSide::None => return false, }; let name = schema.field(*index).name(); @@ -588,8 +593,9 @@ fn hash_join_convert_symmetric_subrule( match side { JoinSide::Left => hash_join.left().output_ordering(), JoinSide::Right => hash_join.right().output_ordering(), + JoinSide::None => unreachable!(), } - .map(|p| p.to_vec()) + .map(|p| LexOrdering::new(p.to_vec())) }) .flatten() }; @@ -719,7 +725,6 @@ fn apply_subrules( #[cfg(test)] mod tests_statistical { - use super::*; use crate::{ physical_plan::{displayable, ColumnStatistics, Statistics}, diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index b4dd0a995d5f..5aecf036ce18 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -54,7 +54,7 @@ use datafusion_physical_expr::{ use datafusion_physical_plan::streaming::StreamingTableExec; use datafusion_physical_plan::union::UnionExec; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use datafusion_physical_optimizer::PhysicalOptimizerRule; use itertools::Itertools; @@ -246,7 +246,7 @@ fn try_swapping_with_streaming_table( let mut lex_orderings = vec![]; for lex_ordering in streaming_table.projected_output_ordering().into_iter() { - let mut orderings = vec![]; + let mut orderings = LexOrdering::default(); for order in lex_ordering { let Some(new_ordering) = update_expr(&order.expr, projection.expr(), false)? else { @@ -467,7 +467,7 @@ fn try_swapping_with_sort( return Ok(None); } - let mut updated_exprs = vec![]; + let mut updated_exprs = LexOrdering::default(); for sort in sort.expr() { let Some(new_expr) = update_expr(&sort.expr, projection.expr(), false)? else { return Ok(None); @@ -497,7 +497,7 @@ fn try_swapping_with_sort_preserving_merge( return Ok(None); } - let mut updated_exprs = vec![]; + let mut updated_exprs = LexOrdering::default(); for sort in spm.expr() { let Some(updated_expr) = update_expr(&sort.expr, projection.expr(), false)? else { @@ -915,8 +915,14 @@ fn try_swapping_with_sym_hash_join( new_filter, sym_join.join_type(), sym_join.null_equals_null(), - sym_join.right().output_ordering().map(|p| p.to_vec()), - sym_join.left().output_ordering().map(|p| p.to_vec()), + sym_join + .right() + .output_ordering() + .map(|p| LexOrdering::new(p.to_vec())), + sym_join + .left() + .output_ordering() + .map(|p| LexOrdering::new(p.to_vec())), sym_join.partition_mode(), )?))) } @@ -1863,7 +1869,7 @@ mod tests { }) as _], Some(&vec![0_usize, 2, 4, 3]), vec![ - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("e", 2)), options: SortOptions::default(), @@ -1872,11 +1878,11 @@ mod tests { expr: Arc::new(Column::new("a", 0)), options: SortOptions::default(), }, - ], - vec![PhysicalSortExpr { + ]), + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("d", 3)), options: SortOptions::default(), - }], + }]), ] .into_iter(), true, @@ -1923,7 +1929,7 @@ mod tests { assert_eq!( result.projected_output_ordering().into_iter().collect_vec(), vec![ - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("e", 1)), options: SortOptions::default(), @@ -1932,11 +1938,11 @@ mod tests { expr: Arc::new(Column::new("a", 2)), options: SortOptions::default(), }, - ], - vec![PhysicalSortExpr { + ]), + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("d", 0)), options: SortOptions::default(), - }], + }]), ] ); assert!(result.is_infinite()); @@ -2553,7 +2559,7 @@ mod tests { fn test_sort_after_projection() -> Result<()> { let csv = create_simple_csv_exec(); let sort_req: Arc = Arc::new(SortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("b", 1)), options: SortOptions::default(), @@ -2566,7 +2572,7 @@ mod tests { )), options: SortOptions::default(), }, - ], + ]), csv.clone(), )); let projection: Arc = Arc::new(ProjectionExec::try_new( @@ -2581,7 +2587,7 @@ mod tests { let initial = get_plan_string(&projection); let expected_initial = [ "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " SortExec: expr=[b@1 ASC,c@2 + a@0 ASC], preserve_partitioning=[false]", + " SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; assert_eq!(initial, expected_initial); @@ -2590,7 +2596,7 @@ mod tests { ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; let expected = [ - "SortExec: expr=[b@2 ASC,c@0 + new_a@1 ASC], preserve_partitioning=[false]", + "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]", " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; @@ -2603,7 +2609,7 @@ mod tests { fn test_sort_preserving_after_projection() -> Result<()> { let csv = create_simple_csv_exec(); let sort_req: Arc = Arc::new(SortPreservingMergeExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("b", 1)), options: SortOptions::default(), @@ -2616,7 +2622,7 @@ mod tests { )), options: SortOptions::default(), }, - ], + ]), csv.clone(), )); let projection: Arc = Arc::new(ProjectionExec::try_new( @@ -2631,7 +2637,7 @@ mod tests { let initial = get_plan_string(&projection); let expected_initial = [ "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " SortPreservingMergeExec: [b@1 ASC,c@2 + a@0 ASC]", + " SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; assert_eq!(initial, expected_initial); @@ -2640,7 +2646,7 @@ mod tests { ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; let expected = [ - "SortPreservingMergeExec: [b@2 ASC,c@0 + new_a@1 ASC]", + "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]", " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs index a989be987d3d..930ce52e6fa2 100644 --- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs @@ -33,6 +33,7 @@ use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::ExecutionPlanProperties; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use itertools::izip; /// For a given `plan`, this object carries the information one needs from its @@ -131,7 +132,8 @@ fn plan_with_order_preserving_variants( if let Some(ordering) = child.output_ordering().map(Vec::from) { // When the input of a `CoalescePartitionsExec` has an ordering, // replace it with a `SortPreservingMergeExec` if appropriate: - let spm = SortPreservingMergeExec::new(ordering, child.clone()); + let spm = + SortPreservingMergeExec::new(LexOrdering::new(ordering), child.clone()); sort_input.plan = Arc::new(spm) as _; sort_input.children[0].data = true; return Ok(sort_input); @@ -255,7 +257,7 @@ pub(crate) fn replace_with_order_preserving_variants( if alternate_plan .plan .equivalence_properties() - .ordering_satisfy(requirements.plan.output_ordering().unwrap_or(&[])) + .ordering_satisfy(requirements.plan.output_ordering().unwrap_or_default()) { for child in alternate_plan.children.iter_mut() { child.data = false; diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs index c7677d725b03..9eb200f534db 100644 --- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs @@ -36,10 +36,10 @@ use datafusion_common::{plan_err, JoinSide, Result}; use datafusion_expr::JoinType; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::utils::collect_columns; -use datafusion_physical_expr::{ - LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement, +use datafusion_physical_expr::{LexRequirementRef, PhysicalSortRequirement}; +use datafusion_physical_expr_common::sort_expr::{ + LexOrdering, LexOrderingRef, LexRequirement, }; -use datafusion_physical_expr_common::sort_expr::LexRequirement; use hashbrown::HashSet; @@ -235,7 +235,7 @@ fn pushdown_requirement_to_children( Some(JoinSide::Left) => try_pushdown_requirements_to_join( smj, parent_required, - &parent_required_expr, + parent_required_expr.as_ref(), JoinSide::Left, ), Some(JoinSide::Right) => { @@ -248,7 +248,7 @@ fn pushdown_requirement_to_children( try_pushdown_requirements_to_join( smj, parent_required, - &new_right_required_expr, + new_right_required_expr.as_ref(), JoinSide::Right, ) } @@ -277,7 +277,7 @@ fn pushdown_requirement_to_children( spm_eqs = spm_eqs.with_reorder(new_ordering); // Do not push-down through SortPreservingMergeExec when // ordering requirement invalidates requirement of sort preserving merge exec. - if !spm_eqs.ordering_satisfy(plan.output_ordering().unwrap_or(&[])) { + if !spm_eqs.ordering_satisfy(plan.output_ordering().unwrap_or_default()) { Ok(None) } else { // Can push-down through SortPreservingMergeExec, because parent requirement is finer @@ -344,10 +344,11 @@ fn determine_children_requirement( RequirementsCompatibility::NonCompatible } } + fn try_pushdown_requirements_to_join( smj: &SortMergeJoinExec, parent_required: LexRequirementRef, - sort_expr: &[PhysicalSortExpr], + sort_expr: LexOrderingRef, push_side: JoinSide, ) -> Result>>> { let left_eq_properties = smj.left().equivalence_properties(); @@ -355,13 +356,13 @@ fn try_pushdown_requirements_to_join( let mut smj_required_orderings = smj.required_input_ordering(); let right_requirement = smj_required_orderings.swap_remove(1); let left_requirement = smj_required_orderings.swap_remove(0); - let left_ordering = smj.left().output_ordering().unwrap_or(&[]); - let right_ordering = smj.right().output_ordering().unwrap_or(&[]); + let left_ordering = smj.left().output_ordering().unwrap_or_default(); + let right_ordering = smj.right().output_ordering().unwrap_or_default(); let (new_left_ordering, new_right_ordering) = match push_side { JoinSide::Left => { let left_eq_properties = left_eq_properties .clone() - .with_reorder(Vec::from(sort_expr)); + .with_reorder(LexOrdering::from_ref(sort_expr)); if left_eq_properties .ordering_satisfy_requirement(&left_requirement.unwrap_or_default()) { @@ -374,7 +375,7 @@ fn try_pushdown_requirements_to_join( JoinSide::Right => { let right_eq_properties = right_eq_properties .clone() - .with_reorder(Vec::from(sort_expr)); + .with_reorder(LexOrdering::from_ref(sort_expr)); if right_eq_properties .ordering_satisfy_requirement(&right_requirement.unwrap_or_default()) { @@ -384,6 +385,7 @@ fn try_pushdown_requirements_to_join( return Ok(None); } } + JoinSide::None => return Ok(None), }; let join_type = smj.join_type(); let probe_side = SortMergeJoinExec::probe_side(&join_type); @@ -410,18 +412,23 @@ fn try_pushdown_requirements_to_join( JoinSide::Right => { required_input_ordering[1] = new_req; } + JoinSide::None => unreachable!(), } required_input_ordering })) } fn expr_source_side( - required_exprs: &[PhysicalSortExpr], + required_exprs: LexOrderingRef, join_type: JoinType, left_columns_len: usize, ) -> Option { match join_type { - JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => { + JoinType::Inner + | JoinType::Left + | JoinType::Right + | JoinType::Full + | JoinType::LeftMark => { let all_column_sides = required_exprs .iter() .filter_map(|r| { diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs index 98f1a7c21a39..bdf16300ea87 100644 --- a/datafusion/core/src/physical_optimizer/test_utils.rs +++ b/datafusion/core/src/physical_optimizer/test_utils.rs @@ -57,7 +57,7 @@ use datafusion_physical_plan::{ use async_trait::async_trait; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr_common::sort_expr::{ - LexRequirement, PhysicalSortRequirement, + LexOrdering, LexRequirement, PhysicalSortRequirement, }; async fn register_current_csv( @@ -243,7 +243,7 @@ pub fn bounded_window_exec( sort_exprs: impl IntoIterator, input: Arc, ) -> Arc { - let sort_exprs: Vec<_> = sort_exprs.into_iter().collect(); + let sort_exprs: LexOrdering = sort_exprs.into_iter().collect(); let schema = input.schema(); Arc::new( @@ -253,7 +253,7 @@ pub fn bounded_window_exec( "count".to_owned(), &[col(col_name, &schema).unwrap()], &[], - &sort_exprs, + sort_exprs.as_ref(), Arc::new(WindowFrame::new(Some(false))), schema.as_ref(), false, @@ -364,7 +364,7 @@ pub fn sort_exec( /// A test [`ExecutionPlan`] whose requirements can be configured. #[derive(Debug)] pub struct RequirementsTestExec { - required_input_ordering: Vec, + required_input_ordering: LexOrdering, maintains_input_order: bool, input: Arc, } @@ -372,7 +372,7 @@ pub struct RequirementsTestExec { impl RequirementsTestExec { pub fn new(input: Arc) -> Self { Self { - required_input_ordering: vec![], + required_input_ordering: LexOrdering::default(), maintains_input_order: true, input, } @@ -381,7 +381,7 @@ impl RequirementsTestExec { /// sets the required input ordering pub fn with_required_input_ordering( mut self, - required_input_ordering: Vec, + required_input_ordering: LexOrdering, ) -> Self { self.required_input_ordering = required_input_ordering; self @@ -419,8 +419,9 @@ impl ExecutionPlan for RequirementsTestExec { } fn required_input_ordering(&self) -> Vec> { - let requirement = - PhysicalSortRequirement::from_sort_exprs(&self.required_input_ordering); + let requirement = PhysicalSortRequirement::from_sort_exprs( + self.required_input_ordering.as_ref().iter(), + ); vec![Some(requirement)] } diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs index 26cdd65883e4..d85278556cc4 100644 --- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs +++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs @@ -138,12 +138,12 @@ fn try_convert_aggregate_if_better( aggr_exprs .into_iter() .map(|aggr_expr| { - let aggr_sort_exprs = aggr_expr.order_bys().unwrap_or(&[]); + let aggr_sort_exprs = &aggr_expr.order_bys().unwrap_or_default(); let reverse_aggr_sort_exprs = reverse_order_bys(aggr_sort_exprs); let aggr_sort_reqs = - PhysicalSortRequirement::from_sort_exprs(aggr_sort_exprs); + PhysicalSortRequirement::from_sort_exprs(aggr_sort_exprs.iter()); let reverse_aggr_req = - PhysicalSortRequirement::from_sort_exprs(&reverse_aggr_sort_exprs); + PhysicalSortRequirement::from_sort_exprs(&reverse_aggr_sort_exprs.inner); // If the aggregate expression benefits from input ordering, and // there is an actual ordering enabling this, try to update the diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs index 2c0d042281e6..8007d8cc7f00 100644 --- a/datafusion/core/src/physical_optimizer/utils.rs +++ b/datafusion/core/src/physical_optimizer/utils.rs @@ -39,7 +39,7 @@ pub fn add_sort_above( fetch: Option, ) -> PlanContext { let mut sort_expr = PhysicalSortRequirement::to_sort_exprs(sort_requirements); - sort_expr.retain(|sort_expr| { + sort_expr.inner.retain(|sort_expr| { !node .plan .equivalence_properties() diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index ffedc2d6b6ef..2a96a2ad111f 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -649,14 +649,16 @@ impl DefaultPhysicalPlanner { aggr_expr, .. }) => { + let options = session_state.config().options(); // Initially need to perform the aggregate and then merge the partitions let input_exec = children.one()?; let physical_input_schema = input_exec.schema(); let logical_input_schema = input.as_ref().schema(); - let physical_input_schema_from_logical: Arc = - logical_input_schema.as_ref().clone().into(); + let physical_input_schema_from_logical = logical_input_schema.inner(); - if physical_input_schema != physical_input_schema_from_logical { + if &physical_input_schema != physical_input_schema_from_logical + && !options.execution.skip_physical_aggregate_schema_check + { return internal_err!("Physical input schema should be the same as the one converted from logical input schema."); } @@ -1199,6 +1201,9 @@ impl DefaultPhysicalPlanner { // statement can be prepared) return not_impl_err!("Unsupported logical plan: Prepare"); } + LogicalPlan::Execute(_) => { + return not_impl_err!("Unsupported logical plan: Execute"); + } LogicalPlan::Dml(dml) => { // DataFusion is a read-only query engine, but also a library, so consumers may implement this return not_impl_err!("Unsupported logical plan: Dml({0})", dml.op); @@ -1516,7 +1521,7 @@ pub fn create_window_expr_with_name( name, &physical_args, &partition_by, - &order_by, + order_by.as_ref(), window_frame, physical_schema, ignore_nulls, @@ -1545,7 +1550,7 @@ type AggregateExprWithOptionalArgs = ( // The filter clause, if any Option>, // Ordering requirements, if any - Option>, + Option, ); /// Create an aggregate expression with a name from a logical expression @@ -1595,12 +1600,12 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( None => None, }; - let ordering_reqs: Vec = - physical_sort_exprs.clone().unwrap_or(vec![]); + let ordering_reqs: LexOrdering = + physical_sort_exprs.clone().unwrap_or_default(); let agg_expr = AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec()) - .order_by(ordering_reqs.to_vec()) + .order_by(ordering_reqs) .schema(Arc::new(physical_input_schema.to_owned())) .alias(name) .with_ignore_nulls(ignore_nulls) @@ -1669,7 +1674,7 @@ pub fn create_physical_sort_exprs( exprs .iter() .map(|expr| create_physical_sort_expr(expr, input_dfschema, execution_props)) - .collect::>>() + .collect::>() } impl DefaultPhysicalPlanner { diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 0c3c2a99517e..439aa6147e9b 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -114,10 +114,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> { .await? .aggregate(vec![], vec![count(wildcard())])? .select(vec![count(wildcard())])? - .into_unoptimized_plan(), - // Usually, into_optimized_plan() should be used here, but due to - // https://github.com/apache/datafusion/issues/5771, - // subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here. + .into_optimized_plan()?, ), ))? .select(vec![col("a"), col("b")])? diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 28901b14b5b7..21f604e6c60f 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -39,15 +39,15 @@ use datafusion_physical_expr::PhysicalSortExpr; use datafusion_physical_plan::InputOrderMode; use test_utils::{add_empty_batches, StringBatchGenerator}; +use crate::fuzz_cases::aggregation_fuzzer::{ + AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder, +}; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use hashbrown::HashMap; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use tokio::task::JoinSet; -use crate::fuzz_cases::aggregation_fuzzer::{ - AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder, -}; - // ======================================================================== // The new aggregation fuzz tests based on [`AggregationFuzzer`] // ======================================================================== @@ -65,10 +65,6 @@ use crate::fuzz_cases::aggregation_fuzzer::{ // // TODO: test other aggregate functions // - AVG (unstable given the wide range of inputs) -// -// TODO: specific test for ordering (ensure all group by columns are ordered) -// Currently the data is sorted by random columns, so there are almost no -// repeated runs. To improve coverage we should also sort by lower cardinality columns #[tokio::test(flavor = "multi_thread")] async fn test_min() { let data_gen_config = baseline_config(); @@ -79,7 +75,7 @@ async fn test_min() { .with_aggregate_function("min") // min works on all column types .with_aggregate_arguments(data_gen_config.all_columns()) - .with_group_by_columns(data_gen_config.all_columns()); + .set_group_by_columns(data_gen_config.all_columns()); AggregationFuzzerBuilder::from(data_gen_config) .add_query_builder(query_builder) @@ -98,7 +94,7 @@ async fn test_max() { .with_aggregate_function("max") // max works on all column types .with_aggregate_arguments(data_gen_config.all_columns()) - .with_group_by_columns(data_gen_config.all_columns()); + .set_group_by_columns(data_gen_config.all_columns()); AggregationFuzzerBuilder::from(data_gen_config) .add_query_builder(query_builder) @@ -118,7 +114,7 @@ async fn test_sum() { .with_distinct_aggregate_function("sum") // sum only works on numeric columns .with_aggregate_arguments(data_gen_config.numeric_columns()) - .with_group_by_columns(data_gen_config.all_columns()); + .set_group_by_columns(data_gen_config.all_columns()); AggregationFuzzerBuilder::from(data_gen_config) .add_query_builder(query_builder) @@ -138,7 +134,7 @@ async fn test_count() { .with_distinct_aggregate_function("count") // count work for all arguments .with_aggregate_arguments(data_gen_config.all_columns()) - .with_group_by_columns(data_gen_config.all_columns()); + .set_group_by_columns(data_gen_config.all_columns()); AggregationFuzzerBuilder::from(data_gen_config) .add_query_builder(query_builder) @@ -174,15 +170,21 @@ fn baseline_config() -> DatasetGeneratorConfig { // TODO add support for utf8view in data generator // ColumnDescr::new("utf8view", DataType::Utf8View), // todo binary + // low cardinality columns + ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10), + ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10), ]; + let min_num_rows = 512; + let max_num_rows = 1024; + DatasetGeneratorConfig { columns, - rows_num_range: (512, 1024), + rows_num_range: (min_num_rows, max_num_rows), sort_keys_set: vec![ // low cardinality to try and get many repeated runs - vec![String::from("u8")], - vec![String::from("utf8"), String::from("u8")], + vec![String::from("u8_low")], + vec![String::from("utf8_low"), String::from("u8_low")], ], } } @@ -232,7 +234,7 @@ async fn run_aggregate_test(input1: Vec, group_by_columns: Vec<&str let schema = input1[0].schema(); let session_config = SessionConfig::new().with_batch_size(50); let ctx = SessionContext::new_with_config(session_config); - let mut sort_keys = vec![]; + let mut sort_keys = LexOrdering::default(); for ordering_col in ["a", "b", "c"] { sort_keys.push(PhysicalSortExpr { expr: col(ordering_col, &schema).unwrap(), diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs index ef9b5a7f355a..aafa5ed7f66b 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs @@ -25,6 +25,7 @@ use arrow_array::{ArrayRef, RecordBatch}; use arrow_schema::{DataType, Field, Schema}; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result}; use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::sorts::sort::sort_batch; use rand::{ rngs::{StdRng, ThreadRng}, @@ -140,8 +141,8 @@ impl DatasetGenerator { let col_expr = col(key, schema)?; Ok(PhysicalSortExpr::new_default(col_expr)) }) - .collect::>>()?; - let sorted_batch = sort_batch(&base_batch, &sort_exprs, None)?; + .collect::>()?; + let sorted_batch = sort_batch(&base_batch, sort_exprs.as_ref(), None)?; let batches = stagger_batch(sorted_batch); let dataset = Dataset::new(batches, sort_keys); @@ -174,11 +175,16 @@ impl Dataset { #[derive(Debug, Clone)] pub struct ColumnDescr { - // Column name + /// Column name name: String, - // Data type of this column + /// Data type of this column column_type: DataType, + + /// The maximum number of distinct values in this column. + /// + /// See [`ColumnDescr::with_max_num_distinct`] for more information + max_num_distinct: Option, } impl ColumnDescr { @@ -187,8 +193,18 @@ impl ColumnDescr { Self { name: name.to_string(), column_type, + max_num_distinct: None, } } + + /// set the maximum number of distinct values in this column + /// + /// If `None`, the number of distinct values is randomly selected between 1 + /// and the number of rows. + pub fn with_max_num_distinct(mut self, num_distinct: usize) -> Self { + self.max_num_distinct = Some(num_distinct); + self + } } /// Record batch generator @@ -203,20 +219,15 @@ struct RecordBatchGenerator { } macro_rules! generate_string_array { - ($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $OFFSET_TYPE:ty) => {{ + ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $OFFSET_TYPE:ty) => {{ let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; let max_len = $BATCH_GEN_RNG.gen_range(1..50); - let num_distinct_strings = if $NUM_ROWS > 1 { - $BATCH_GEN_RNG.gen_range(1..$NUM_ROWS) - } else { - $NUM_ROWS - }; let mut generator = StringArrayGenerator { max_len, num_strings: $NUM_ROWS, - num_distinct_strings, + num_distinct_strings: $MAX_NUM_DISTINCT, null_pct, rng: $ARRAY_GEN_RNG, }; @@ -226,19 +237,14 @@ macro_rules! generate_string_array { } macro_rules! generate_primitive_array { - ($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => { + ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => { paste::paste! {{ let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len()); let null_pct = $SELF.candidate_null_pcts[null_pct_idx]; - let num_distinct_primitives = if $NUM_ROWS > 1 { - $BATCH_GEN_RNG.gen_range(1..$NUM_ROWS) - } else { - $NUM_ROWS - }; let mut generator = PrimitiveArrayGenerator { num_primitives: $NUM_ROWS, - num_distinct_primitives, + num_distinct_primitives: $MAX_NUM_DISTINCT, null_pct, rng: $ARRAY_GEN_RNG, }; @@ -268,7 +274,7 @@ impl RecordBatchGenerator { let mut arrays = Vec::with_capacity(self.columns.len()); for col in self.columns.iter() { let array = self.generate_array_of_type( - col.column_type.clone(), + col, num_rows, &mut rng, array_gen_rng.clone(), @@ -289,16 +295,28 @@ impl RecordBatchGenerator { fn generate_array_of_type( &self, - data_type: DataType, + col: &ColumnDescr, num_rows: usize, batch_gen_rng: &mut ThreadRng, array_gen_rng: StdRng, ) -> ArrayRef { - match data_type { + let num_distinct = if num_rows > 1 { + batch_gen_rng.gen_range(1..num_rows) + } else { + num_rows + }; + // cap to at most the num_distinct values + let max_num_distinct = col + .max_num_distinct + .map(|max| num_distinct.min(max)) + .unwrap_or(num_distinct); + + match col.column_type { DataType::Int8 => { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Int8Type @@ -308,6 +326,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Int16Type @@ -317,6 +336,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Int32Type @@ -326,6 +346,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Int64Type @@ -335,6 +356,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, UInt8Type @@ -344,6 +366,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, UInt16Type @@ -353,6 +376,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, UInt32Type @@ -362,6 +386,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, UInt64Type @@ -371,6 +396,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Float32Type @@ -380,6 +406,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Float64Type @@ -389,6 +416,7 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Date32Type @@ -398,19 +426,34 @@ impl RecordBatchGenerator { generate_primitive_array!( self, num_rows, + max_num_distinct, batch_gen_rng, array_gen_rng, Date64Type ) } DataType::Utf8 => { - generate_string_array!(self, num_rows, batch_gen_rng, array_gen_rng, i32) + generate_string_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + i32 + ) } DataType::LargeUtf8 => { - generate_string_array!(self, num_rows, batch_gen_rng, array_gen_rng, i64) + generate_string_array!( + self, + num_rows, + max_num_distinct, + batch_gen_rng, + array_gen_rng, + i64 + ) } _ => { - panic!("Unsupported data generator type: {data_type}") + panic!("Unsupported data generator type: {}", col.column_type) } } } @@ -435,14 +478,8 @@ mod test { // - Their rows num should be same and between [16, 32] let config = DatasetGeneratorConfig { columns: vec![ - ColumnDescr { - name: "a".to_string(), - column_type: DataType::Utf8, - }, - ColumnDescr { - name: "b".to_string(), - column_type: DataType::UInt32, - }, + ColumnDescr::new("a", DataType::Utf8), + ColumnDescr::new("b", DataType::UInt32), ], rows_num_range: (16, 32), sort_keys_set: vec![vec!["b".to_string()]], diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index 0704bafa0318..d021e73f35b2 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -63,17 +63,35 @@ impl AggregationFuzzerBuilder { } /// Adds random SQL queries to the fuzzer along with the table name - pub fn add_query_builder(mut self, query_builder: QueryBuilder) -> Self { - const NUM_QUERIES: usize = 10; + /// + /// Adds + /// - 3 random queries + /// - 3 random queries for each group by selected from the sort keys + /// - 1 random query with no grouping + pub fn add_query_builder(mut self, mut query_builder: QueryBuilder) -> Self { + const NUM_QUERIES: usize = 3; for _ in 0..NUM_QUERIES { - self = self.add_sql(&query_builder.generate_query()); + let sql = query_builder.generate_query(); + self.candidate_sqls.push(Arc::from(sql)); } - self.table_name(query_builder.table_name()) - } - - fn add_sql(mut self, sql: &str) -> Self { + // also add several queries limited to grouping on the group by columns only, if any + // So if the data is sorted on `a,b` only group by `a,b` or`a` or `b` + if let Some(data_gen_config) = &self.data_gen_config { + for sort_keys in &data_gen_config.sort_keys_set { + let group_by_columns = sort_keys.iter().map(|s| s.as_str()); + query_builder = query_builder.set_group_by_columns(group_by_columns); + for _ in 0..NUM_QUERIES { + let sql = query_builder.generate_query(); + self.candidate_sqls.push(Arc::from(sql)); + } + } + } + // also add a query with no grouping + query_builder = query_builder.set_group_by_columns(vec![]); + let sql = query_builder.generate_query(); self.candidate_sqls.push(Arc::from(sql)); - self + + self.table_name(query_builder.table_name()) } pub fn table_name(mut self, table_name: &str) -> Self { @@ -359,7 +377,7 @@ fn format_batches_with_limit(batches: &[RecordBatch]) -> impl std::fmt::Display /// ```sql /// SELECT AGG(..) FROM table_name GROUP BY ///``` -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct QueryBuilder { /// The name of the table to query table_name: String, @@ -412,17 +430,16 @@ impl QueryBuilder { self } - /// Add a column to be used in the group bys - pub fn with_group_by_columns<'a>( + /// Set the columns to be used in the group bys clauses + pub fn set_group_by_columns<'a>( mut self, group_by: impl IntoIterator, ) -> Self { - let group_by = group_by.into_iter().map(String::from); - self.group_by_columns.extend(group_by); + self.group_by_columns = group_by.into_iter().map(String::from).collect(); self } - /// Add a column to be used as an argument in the aggregate functions + /// Add one or more columns to be used as an argument in the aggregate functions pub fn with_aggregate_arguments<'a>( mut self, arguments: impl IntoIterator, @@ -497,7 +514,9 @@ impl QueryBuilder { let mut already_used = HashSet::new(); let mut group_by = vec![]; - while group_by.len() < num_group_by { + while group_by.len() < num_group_by + && already_used.len() != self.group_by_columns.len() + { let idx = rng.gen_range(0..self.group_by_columns.len()); if already_used.insert(idx) { group_by.push(self.group_by_columns[idx].clone()); diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 94157e11702c..525baadd14a5 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -25,7 +25,7 @@ use datafusion_common::{DFSchema, Result}; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::expressions::{col, BinaryExpr}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use itertools::Itertools; use std::sync::Arc; @@ -62,7 +62,7 @@ fn test_ordering_satisfy_with_equivalence_random() -> Result<()> { expr: Arc::clone(expr), options: SORT_OPTIONS, }) - .collect::>(); + .collect::(); let expected = is_table_same_after_sort( requirement.clone(), table_data_with_properties.clone(), @@ -74,7 +74,7 @@ fn test_ordering_satisfy_with_equivalence_random() -> Result<()> { // Check whether ordering_satisfy API result and // experimental result matches. assert_eq!( - eq_properties.ordering_satisfy(&requirement), + eq_properties.ordering_satisfy(requirement.as_ref()), expected, "{}", err_msg @@ -135,7 +135,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { expr: Arc::clone(expr), options: SORT_OPTIONS, }) - .collect::>(); + .collect::(); let expected = is_table_same_after_sort( requirement.clone(), table_data_with_properties.clone(), @@ -148,7 +148,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { // experimental result matches. assert_eq!( - eq_properties.ordering_satisfy(&requirement), + eq_properties.ordering_satisfy(requirement.as_ref()), (expected | false), "{}", err_msg @@ -311,7 +311,7 @@ fn test_ordering_satisfy_with_equivalence() -> Result<()> { expr: Arc::clone(expr), options, }) - .collect::>(); + .collect::(); // Check expected result with experimental result. assert_eq!( @@ -322,7 +322,7 @@ fn test_ordering_satisfy_with_equivalence() -> Result<()> { expected ); assert_eq!( - eq_properties.ordering_satisfy(&required), + eq_properties.ordering_satisfy(required.as_ref()), expected, "{err_msg}" ); diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index c0c8517a612b..3df3e0348e42 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -25,7 +25,7 @@ use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::ProjectionMapping; use datafusion_physical_expr::expressions::{col, BinaryExpr}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use itertools::Itertools; use std::sync::Arc; @@ -173,7 +173,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { expr: Arc::clone(expr), options: SORT_OPTIONS, }) - .collect::>(); + .collect::(); let expected = is_table_same_after_sort( requirement.clone(), projected_batch.clone(), @@ -185,7 +185,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { // Check whether ordering_satisfy API result and // experimental result matches. assert_eq!( - projected_eq.ordering_satisfy(&requirement), + projected_eq.ordering_satisfy(requirement.as_ref()), expected, "{}", err_msg diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index e704fcacc328..82586bd79eda 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -23,7 +23,7 @@ use datafusion_common::{DFSchema, Result}; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::expressions::{col, BinaryExpr}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use itertools::Itertools; use std::sync::Arc; @@ -76,7 +76,7 @@ fn test_find_longest_permutation_random() -> Result<()> { expr: Arc::clone(&exprs[idx]), options: sort_expr.options, }) - .collect::>(); + .collect::(); assert_eq!( ordering, ordering2, "indices and lexicographical ordering do not match" diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index acc45fe0e591..35da8b596380 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -223,7 +223,7 @@ fn add_equal_conditions_test() -> Result<()> { /// If the table remains the same after sorting with the added unique column, it indicates that the table was /// already sorted according to `required_ordering` to begin with. pub fn is_table_same_after_sort( - mut required_ordering: Vec, + mut required_ordering: LexOrdering, batch: RecordBatch, ) -> Result { // Clone the original schema and columns @@ -444,7 +444,7 @@ pub fn generate_table_for_orderings( assert!(!orderings.is_empty()); // Sort the inner vectors by their lengths (longest first) - orderings.sort_by_key(|v| std::cmp::Reverse(v.len())); + orderings.sort_by_key(|v| std::cmp::Reverse(v.inner.len())); let arrays = schema .fields @@ -459,13 +459,13 @@ pub fn generate_table_for_orderings( let batch = RecordBatch::try_from_iter(arrays)?; // Sort batch according to first ordering expression - let sort_columns = get_sort_columns(&batch, &orderings[0])?; + let sort_columns = get_sort_columns(&batch, orderings[0].as_ref())?; let sort_indices = lexsort_to_indices(&sort_columns, None)?; let mut batch = take_record_batch(&batch, &sort_indices)?; // prune out rows that is invalid according to remaining orderings. for ordering in orderings.iter().skip(1) { - let sort_columns = get_sort_columns(&batch, ordering)?; + let sort_columns = get_sort_columns(&batch, ordering.as_ref())?; // Collect sort options and values into separate vectors. let (sort_options, sort_col_values): (Vec<_>, Vec<_>) = sort_columns @@ -495,7 +495,7 @@ pub fn generate_table_for_orderings( // Convert each tuple to PhysicalSortExpr pub fn convert_to_sort_exprs( in_data: &[(&Arc, SortOptions)], -) -> Vec { +) -> LexOrdering { in_data .iter() .map(|(expr, options)| PhysicalSortExpr { @@ -508,7 +508,7 @@ pub fn convert_to_sort_exprs( // Convert each inner tuple to PhysicalSortExpr pub fn convert_to_orderings( orderings: &[Vec<(&Arc, SortOptions)>], -) -> Vec> { +) -> Vec { orderings .iter() .map(|sort_exprs| convert_to_sort_exprs(sort_exprs)) diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs index c8478db22bd4..d7a3460e4987 100644 --- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs @@ -41,7 +41,6 @@ use datafusion::physical_plan::joins::{ }; use datafusion::physical_plan::memory::MemoryExec; -use crate::fuzz_cases::join_fuzz::JoinTestType::NljHj; use datafusion::prelude::{SessionConfig, SessionContext}; use test_utils::stagger_batch_with_seed; @@ -90,7 +89,6 @@ fn col_lt_col_filter(schema1: Arc, schema2: Arc) -> JoinFilter { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_inner_join_1k_filtered() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -103,7 +101,6 @@ async fn test_inner_join_1k_filtered() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_inner_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -116,7 +113,6 @@ async fn test_inner_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_left_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -129,7 +125,6 @@ async fn test_left_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_left_join_1k_filtered() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -142,7 +137,6 @@ async fn test_left_join_1k_filtered() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_right_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -155,7 +149,6 @@ async fn test_right_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_right_join_1k_filtered() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -168,7 +161,6 @@ async fn test_right_join_1k_filtered() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_full_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -181,7 +173,6 @@ async fn test_full_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] // flaky for HjSmj case // https://github.com/apache/datafusion/issues/12359 async fn test_full_join_1k_filtered() { @@ -196,7 +187,6 @@ async fn test_full_join_1k_filtered() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_semi_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -209,7 +199,6 @@ async fn test_semi_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_semi_join_1k_filtered() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -222,7 +211,6 @@ async fn test_semi_join_1k_filtered() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_anti_join_1k() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -235,7 +223,6 @@ async fn test_anti_join_1k() { } #[tokio::test] -#[allow(unused_qualifications)] async fn test_anti_join_1k_filtered() { JoinFuzzTestCase::new( make_staggered_batches(1000), @@ -243,7 +230,31 @@ async fn test_anti_join_1k_filtered() { JoinType::LeftAnti, Some(Box::new(col_lt_col_filter)), ) - .run_test(&[JoinTestType::HjSmj, NljHj], false) + .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false) + .await +} + +#[tokio::test] +async fn test_left_mark_join_1k() { + JoinFuzzTestCase::new( + make_staggered_batches(1000), + make_staggered_batches(1000), + JoinType::LeftMark, + None, + ) + .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false) + .await +} + +#[tokio::test] +async fn test_left_mark_join_1k_filtered() { + JoinFuzzTestCase::new( + make_staggered_batches(1000), + make_staggered_batches(1000), + JoinType::LeftMark, + Some(Box::new(col_lt_col_filter)), + ) + .run_test(&[JoinTestType::HjSmj, JoinTestType::NljHj], false) .await } @@ -461,7 +472,6 @@ impl JoinFuzzTestCase { /// `join_tests` - identifies what join types to test /// if `debug` flag is set the test will save randomly generated inputs and outputs to user folders, /// so it is easy to debug a test on top of the failed data - #[allow(unused_qualifications)] async fn run_test(&self, join_tests: &[JoinTestType], debug: bool) { for batch_size in self.batch_sizes { let session_config = SessionConfig::new().with_batch_size(*batch_size); diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs index 4eb1070e6c85..4e895920dd3d 100644 --- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs @@ -31,6 +31,7 @@ use datafusion::physical_plan::{ sorts::sort_preserving_merge::SortPreservingMergeExec, }; use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use test_utils::{batches_to_vec, partitions_to_sorted_vec, stagger_batch_with_seed}; #[tokio::test] @@ -107,13 +108,13 @@ async fn run_merge_test(input: Vec>) { .expect("at least one batch"); let schema = first_batch.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("x", &schema).unwrap(), options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let exec = MemoryExec::try_new(&input, schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs index 4ba06ef1d2a6..e4acb96f4930 100644 --- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs @@ -30,6 +30,7 @@ use datafusion::physical_plan::{collect, ExecutionPlan}; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_execution::memory_pool::GreedyMemoryPool; use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use rand::Rng; use std::sync::Arc; use test_utils::{batches_to_vec, partitions_to_sorted_vec}; @@ -114,13 +115,13 @@ impl SortTest { .expect("at least one batch"); let schema = first_batch.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("x", &schema).unwrap(), options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let exec = MemoryExec::try_new(&input, schema, None).unwrap(); let sort = Arc::new(SortExec::new(sort, Arc::new(exec))); diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs index 353db8668363..73f4a569954e 100644 --- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs @@ -45,6 +45,7 @@ mod sp_repartition_fuzz_tests { }; use test_utils::add_empty_batches; + use datafusion_physical_expr_common::sort_expr::LexOrdering; use itertools::izip; use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; @@ -345,7 +346,7 @@ mod sp_repartition_fuzz_tests { let schema = input1[0].schema(); let session_config = SessionConfig::new().with_batch_size(50); let ctx = SessionContext::new_with_config(session_config); - let mut sort_keys = vec![]; + let mut sort_keys = LexOrdering::default(); for ordering_col in ["a", "b", "c"] { sort_keys.push(PhysicalSortExpr { expr: col(ordering_col, &schema).unwrap(), diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index 61b4e32ad6c9..5bfb4d97ed70 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -47,6 +47,7 @@ use test_utils::add_empty_batches; use datafusion::functions_window::row_number::row_number_udwf; use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf}; use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf}; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use hashbrown::HashMap; use rand::distributions::Alphanumeric; use rand::rngs::StdRng; @@ -251,7 +252,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> { ]; let partitionby_exprs = vec![]; - let orderby_exprs = vec![]; + let orderby_exprs = LexOrdering::default(); // Window frame starts with "UNBOUNDED PRECEDING": let start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None)); @@ -284,7 +285,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> { fn_name.to_string(), &args, &partitionby_exprs, - &orderby_exprs, + orderby_exprs.as_ref(), Arc::new(window_frame), &extended_schema, false, @@ -599,7 +600,7 @@ async fn run_window_test( let ctx = SessionContext::new_with_config(session_config); let (window_fn, args, fn_name) = get_random_function(&schema, &mut rng, is_linear); let window_frame = get_random_window_frame(&mut rng, is_linear); - let mut orderby_exprs = vec![]; + let mut orderby_exprs = LexOrdering::default(); for column in &orderby_columns { orderby_exprs.push(PhysicalSortExpr { expr: col(column, &schema)?, @@ -607,27 +608,27 @@ async fn run_window_test( }) } if orderby_exprs.len() > 1 && !window_frame.can_accept_multi_orderby() { - orderby_exprs = orderby_exprs[0..1].to_vec(); + orderby_exprs = LexOrdering::new(orderby_exprs[0..1].to_vec()); } let mut partitionby_exprs = vec![]; for column in &partition_by_columns { partitionby_exprs.push(col(column, &schema)?); } - let mut sort_keys = vec![]; + let mut sort_keys = LexOrdering::default(); for partition_by_expr in &partitionby_exprs { sort_keys.push(PhysicalSortExpr { expr: partition_by_expr.clone(), options: SortOptions::default(), }) } - for order_by_expr in &orderby_exprs { + for order_by_expr in &orderby_exprs.inner { if !sort_keys.contains(order_by_expr) { sort_keys.push(order_by_expr.clone()) } } let concat_input_record = concat_batches(&schema, &input1)?; - let source_sort_keys = vec![ + let source_sort_keys = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: Default::default(), @@ -640,7 +641,7 @@ async fn run_window_test( expr: col("c", &schema)?, options: Default::default(), }, - ]; + ]); let mut exec1 = Arc::new( MemoryExec::try_new(&[vec![concat_input_record]], schema.clone(), None)? .try_with_sort_information(vec![source_sort_keys.clone()])?, @@ -659,7 +660,7 @@ async fn run_window_test( fn_name.clone(), &args, &partitionby_exprs, - &orderby_exprs, + orderby_exprs.as_ref(), Arc::new(window_frame.clone()), &extended_schema, false, @@ -677,7 +678,7 @@ async fn run_window_test( fn_name, &args, &partitionby_exprs, - &orderby_exprs, + orderby_exprs.as_ref(), Arc::new(window_frame.clone()), &extended_schema, false, diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index fc2fb9afb5f9..6817969580da 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -238,15 +238,15 @@ async fn sort_preserving_merge() { // SortPreservingMergeExec (not a Sort which would compete // with the SortPreservingMergeExec for memory) &[ - "+---------------+-----------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+-----------------------------------------------------------------------------------------------------------+", - "| logical_plan | Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10 |", - "| | TableScan: t projection=[a, b] |", - "| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10 |", - "| | MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |", - "| | |", - "+---------------+-----------------------------------------------------------------------------------------------------------+", + "+---------------+------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Sort: t.a ASC NULLS LAST, t.b ASC NULLS LAST, fetch=10 |", + "| | TableScan: t projection=[a, b] |", + "| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10 |", + "| | MemoryExec: partitions=2, partition_sizes=[5, 5], output_ordering=a@0 ASC NULLS LAST, b@1 ASC NULLS LAST |", + "| | |", + "+---------------+------------------------------------------------------------------------------------------------------------+", ] ) .run() @@ -281,15 +281,15 @@ async fn sort_spill_reservation() { // also merge, so we can ensure the sort could finish // given enough merging memory &[ - "+---------------+--------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+--------------------------------------------------------------------------------------------------------+", - "| logical_plan | Sort: t.a ASC NULLS LAST, t.b DESC NULLS FIRST |", - "| | TableScan: t projection=[a, b] |", - "| physical_plan | SortExec: expr=[a@0 ASC NULLS LAST,b@1 DESC], preserve_partitioning=[false] |", - "| | MemoryExec: partitions=1, partition_sizes=[5], output_ordering=a@0 ASC NULLS LAST,b@1 ASC NULLS LAST |", - "| | |", - "+---------------+--------------------------------------------------------------------------------------------------------+", + "+---------------+---------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+---------------------------------------------------------------------------------------------------------+", + "| logical_plan | Sort: t.a ASC NULLS LAST, t.b DESC NULLS FIRST |", + "| | TableScan: t projection=[a, b] |", + "| physical_plan | SortExec: expr=[a@0 ASC NULLS LAST, b@1 DESC], preserve_partitioning=[false] |", + "| | MemoryExec: partitions=1, partition_sizes=[5], output_ordering=a@0 ASC NULLS LAST, b@1 ASC NULLS LAST |", + "| | |", + "+---------------+---------------------------------------------------------------------------------------------------------+", ] ); @@ -654,7 +654,7 @@ impl Scenario { descending: false, nulls_first: false, }; - let sort_information = vec![vec![ + let sort_information = vec![LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema).unwrap(), options, @@ -663,7 +663,7 @@ impl Scenario { expr: col("b", &schema).unwrap(), options, }, - ]]; + ])]; let table = SortedTableProvider::new(batches, sort_information); Arc::new(table) diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 15efd4bcd9dd..d201ed3a841f 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -149,8 +149,9 @@ async fn page_index_filter_one_col() { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - // 5.create filter date_string_col == 1; - let filter = col("date_string_col").eq(lit("01/01/09")); + // 5.create filter date_string_col == "01/01/09"`; + // Note this test doesn't apply type coercion so the literal must match the actual view type + let filter = col("date_string_col").eq(lit(ScalarValue::new_utf8view("01/01/09"))); let parquet_exec = get_parquet_exec(&state, filter).await; let mut results = parquet_exec.execute(0, task_ctx.clone()).unwrap(); let batch = results.next().await.unwrap().unwrap(); diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index 6859e2f1468c..6910db6285a3 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -37,6 +37,7 @@ use datafusion_physical_expr::{ expressions::{cast, col}, PhysicalExpr, PhysicalSortExpr, }; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_optimizer::{ limited_distinct_aggregation::LimitedDistinctAggregation, PhysicalOptimizerRule, }; @@ -407,10 +408,10 @@ fn test_has_filter() -> Result<()> { #[test] fn test_has_order_by() -> Result<()> { - let sort_key = vec![PhysicalSortExpr { + let sort_key = LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema()).unwrap(), options: SortOptions::default(), - }]; + }]); let source = parquet_exec_with_sort(vec![sort_key]); let schema = source.schema(); diff --git a/datafusion/core/tests/physical_optimizer/test_util.rs b/datafusion/core/tests/physical_optimizer/test_util.rs index 131b887c4ec7..12cd08fb3db3 100644 --- a/datafusion/core/tests/physical_optimizer/test_util.rs +++ b/datafusion/core/tests/physical_optimizer/test_util.rs @@ -25,11 +25,11 @@ use datafusion::datasource::{ physical_plan::{FileScanConfig, ParquetExec}, }; use datafusion_execution::object_store::ObjectStoreUrl; -use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; /// create a single parquet file that is sorted pub(crate) fn parquet_exec_with_sort( - output_ordering: Vec>, + output_ordering: Vec, ) -> Arc { ParquetExec::builder( FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 8fe028eedd44..10ee0c5cd2dc 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -29,14 +29,20 @@ use std::{ use arrow::array::AsArray; use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray}; -use arrow_schema::{DataType, Field}; +use arrow_schema::{DataType, Field, Schema}; use datafusion::{assert_batches_eq, prelude::SessionContext}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ - PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, + PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDF, WindowUDFImpl, }; -use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_functions_window_common::{ + expr::ExpressionArgs, field::WindowUDFFieldArgs, +}; +use datafusion_physical_expr::{ + expressions::{col, lit}, + PhysicalExpr, +}; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ @@ -641,3 +647,120 @@ fn odd_count_arr(arr: &Int64Array, num_rows: usize) -> ArrayRef { let array: Int64Array = std::iter::repeat(odd_count(arr)).take(num_rows).collect(); Arc::new(array) } + +#[derive(Debug)] +struct VariadicWindowUDF { + signature: Signature, +} + +impl VariadicWindowUDF { + fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Any(0), + TypeSignature::Any(1), + TypeSignature::Any(2), + TypeSignature::Any(3), + ], + Volatility::Immutable, + ), + } + } +} + +impl WindowUDFImpl for VariadicWindowUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "variadic_window_udf" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn partition_evaluator( + &self, + _: PartitionEvaluatorArgs, + ) -> Result> { + unimplemented!("unnecessary for testing"); + } + + fn field(&self, _: WindowUDFFieldArgs) -> Result { + unimplemented!("unnecessary for testing"); + } +} + +#[test] +// Fixes: default implementation of `WindowUDFImpl::expressions` +// returns all input expressions to the user-defined window +// function unmodified. +// +// See: https://github.com/apache/datafusion/pull/13169 +fn test_default_expressions() -> Result<()> { + let udwf = WindowUDF::from(VariadicWindowUDF::new()); + + let field_a = Field::new("a", DataType::Int32, false); + let field_b = Field::new("b", DataType::Float32, false); + let field_c = Field::new("c", DataType::Boolean, false); + let schema = Schema::new(vec![field_a, field_b, field_c]); + + let test_cases = vec![ + // + // Zero arguments + // + vec![], + // + // Single argument + // + vec![col("a", &schema)?], + vec![lit(1)], + // + // Two arguments + // + vec![col("a", &schema)?, col("b", &schema)?], + vec![col("a", &schema)?, lit(2)], + vec![lit(false), col("a", &schema)?], + // + // Three arguments + // + vec![col("a", &schema)?, col("b", &schema)?, col("c", &schema)?], + vec![col("a", &schema)?, col("b", &schema)?, lit(false)], + vec![col("a", &schema)?, lit(0.5), col("c", &schema)?], + vec![lit(3), col("b", &schema)?, col("c", &schema)?], + ]; + + for input_exprs in &test_cases { + let input_types = input_exprs + .iter() + .map(|expr: &Arc| expr.data_type(&schema).unwrap()) + .collect::>(); + let expr_args = ExpressionArgs::new(input_exprs, &input_types); + + let ret_exprs = udwf.expressions(expr_args); + + // Verify same number of input expressions are returned + assert_eq!( + input_exprs.len(), + ret_exprs.len(), + "\nInput expressions: {:?}\nReturned expressions: {:?}", + input_exprs, + ret_exprs + ); + + // Compares each returned expression with original input expressions + for (expected, actual) in input_exprs.iter().zip(&ret_exprs) { + assert_eq!( + format!("{expected:?}"), + format!("{actual:?}"), + "\nInput expressions: {:?}\nReturned expressions: {:?}", + input_exprs, + ret_exprs + ); + } + } + Ok(()) +} diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index bda4d7ae3d7f..a9c183952fc7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1574,8 +1574,13 @@ impl Expr { /// Returns true if the expression is volatile, i.e. whether it can return different /// results when evaluated multiple times with the same input. - pub fn is_volatile(&self) -> Result { - self.exists(|expr| Ok(expr.is_volatile_node())) + /// + /// For example the function call `RANDOM()` is volatile as each call will + /// return a different value. + /// + /// See [`Volatility`] for more information. + pub fn is_volatile(&self) -> bool { + self.exists(|expr| Ok(expr.is_volatile_node())).unwrap() } /// Recursively find all [`Expr::Placeholder`] expressions, and diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index d6d5c3e2931c..c86696854ca3 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -314,6 +314,7 @@ impl NamePreserver { | LogicalPlan::Join(_) | LogicalPlan::TableScan(_) | LogicalPlan::Limit(_) + | LogicalPlan::Execute(_) ), } } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 2547aa23d3cd..b7839c4873af 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -20,6 +20,7 @@ use std::any::Any; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; +use std::iter::once; use std::sync::Arc; use crate::dml::CopyTo; @@ -57,6 +58,7 @@ use datafusion_common::{ UnnestOptions, }; use datafusion_expr_common::type_coercion::binary::type_union_resolution; +use indexmap::IndexSet; /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; @@ -567,7 +569,7 @@ impl LogicalPlanBuilder { /// See for more details fn add_missing_columns( curr_plan: LogicalPlan, - missing_cols: &[Column], + missing_cols: &IndexSet, is_distinct: bool, ) -> Result { match curr_plan { @@ -612,7 +614,7 @@ impl LogicalPlanBuilder { fn ambiguous_distinct_check( missing_exprs: &[Expr], - missing_cols: &[Column], + missing_cols: &IndexSet, projection_exprs: &[Expr], ) -> Result<()> { if missing_exprs.is_empty() { @@ -677,15 +679,16 @@ impl LogicalPlanBuilder { let schema = self.plan.schema(); // Collect sort columns that are missing in the input plan's schema - let mut missing_cols: Vec = vec![]; + let mut missing_cols: IndexSet = IndexSet::new(); sorts.iter().try_for_each::<_, Result<()>>(|sort| { let columns = sort.expr.column_refs(); - columns.into_iter().for_each(|c| { - if !schema.has_column(c) { - missing_cols.push(c.clone()); - } - }); + missing_cols.extend( + columns + .into_iter() + .filter(|c| !schema.has_column(c)) + .cloned(), + ); Ok(()) })?; @@ -1324,6 +1327,25 @@ pub fn change_redundant_column(fields: &Fields) -> Vec { }) .collect() } + +fn mark_field(schema: &DFSchema) -> (Option, Arc) { + let mut table_references = schema + .iter() + .filter_map(|(qualifier, _)| qualifier) + .collect::>(); + table_references.dedup(); + let table_reference = if table_references.len() == 1 { + table_references.pop().cloned() + } else { + None + }; + + ( + table_reference, + Arc::new(Field::new("mark", DataType::Boolean, false)), + ) +} + /// Creates a schema for a join operation. /// The fields from the left side are first pub fn build_join_schema( @@ -1390,6 +1412,10 @@ pub fn build_join_schema( .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect() } + JoinType::LeftMark => left_fields + .map(|(q, f)| (q.cloned(), Arc::clone(f))) + .chain(once(mark_field(right))) + .collect(), JoinType::RightSemi | JoinType::RightAnti => { // Only use the right side for the schema right_fields diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index c0549451a776..9aea7747c414 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -20,10 +20,10 @@ use std::collections::HashMap; use std::fmt; use crate::{ - expr_vec_fmt, Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Expr, - Filter, Join, Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, - Repartition, Sort, Subquery, SubqueryAlias, TableProviderFilterPushDown, TableScan, - Unnest, Values, Window, + expr_vec_fmt, Aggregate, DescribeTable, Distinct, DistinctOn, DmlStatement, Execute, + Expr, Filter, Join, Limit, LogicalPlan, Partitioning, Prepare, Projection, + RecursiveQuery, Repartition, Sort, Subquery, SubqueryAlias, + TableProviderFilterPushDown, TableScan, Unnest, Values, Window, }; use crate::dml::CopyTo; @@ -626,6 +626,15 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> { "Data Types": format!("{:?}", data_types) }) } + LogicalPlan::Execute(Execute { + name, parameters, .. + }) => { + json!({ + "Node Type": "Execute", + "Name": name, + "Parameters": expr_vec_fmt!(parameters), + }) + } LogicalPlan::DescribeTable(DescribeTable { .. }) => { json!({ "Node Type": "DescribeTable" diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index 80a896212442..59654a227829 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -36,7 +36,7 @@ pub use ddl::{ pub use dml::{DmlStatement, WriteOp}; pub use plan::{ projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct, - DistinctOn, EmptyRelation, Explain, Extension, FetchType, Filter, Join, + DistinctOn, EmptyRelation, Execute, Explain, Extension, FetchType, Filter, Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Prepare, Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index a301c48659d7..191a42e38e3a 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -266,6 +266,8 @@ pub enum LogicalPlan { /// Prepare a statement and find any bind parameters /// (e.g. `?`). This is used to implement SQL-prepared statements. Prepare(Prepare), + /// Execute a prepared statement. This is used to implement SQL 'EXECUTE'. + Execute(Execute), /// Data Manipulation Language (DML): Insert / Update / Delete Dml(DmlStatement), /// Data Definition Language (DDL): CREATE / DROP TABLES / VIEWS / SCHEMAS @@ -314,6 +316,7 @@ impl LogicalPlan { LogicalPlan::Subquery(Subquery { subquery, .. }) => subquery.schema(), LogicalPlan::SubqueryAlias(SubqueryAlias { schema, .. }) => schema, LogicalPlan::Prepare(Prepare { input, .. }) => input.schema(), + LogicalPlan::Execute(Execute { schema, .. }) => schema, LogicalPlan::Explain(explain) => &explain.schema, LogicalPlan::Analyze(analyze) => &analyze.schema, LogicalPlan::Extension(extension) => extension.node.schema(), @@ -457,6 +460,7 @@ impl LogicalPlan { | LogicalPlan::Statement { .. } | LogicalPlan::EmptyRelation { .. } | LogicalPlan::Values { .. } + | LogicalPlan::Execute { .. } | LogicalPlan::DescribeTable(_) => vec![], } } @@ -532,7 +536,9 @@ impl LogicalPlan { left.head_output_expr() } } - JoinType::LeftSemi | JoinType::LeftAnti => left.head_output_expr(), + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { + left.head_output_expr() + } JoinType::RightSemi | JoinType::RightAnti => right.head_output_expr(), }, LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => { @@ -558,6 +564,7 @@ impl LogicalPlan { LogicalPlan::Subquery(_) => Ok(None), LogicalPlan::EmptyRelation(_) | LogicalPlan::Prepare(_) + | LogicalPlan::Execute(_) | LogicalPlan::Statement(_) | LogicalPlan::Values(_) | LogicalPlan::Explain(_) @@ -710,6 +717,7 @@ impl LogicalPlan { LogicalPlan::Analyze(_) => Ok(self), LogicalPlan::Explain(_) => Ok(self), LogicalPlan::Prepare(_) => Ok(self), + LogicalPlan::Execute(_) => Ok(self), LogicalPlan::TableScan(_) => Ok(self), LogicalPlan::EmptyRelation(_) => Ok(self), LogicalPlan::Statement(_) => Ok(self), @@ -1070,6 +1078,14 @@ impl LogicalPlan { input: Arc::new(input), })) } + LogicalPlan::Execute(Execute { name, schema, .. }) => { + self.assert_no_inputs(inputs)?; + Ok(LogicalPlan::Execute(Execute { + name: name.clone(), + schema: Arc::clone(schema), + parameters: expr, + })) + } LogicalPlan::TableScan(ts) => { self.assert_no_inputs(inputs)?; Ok(LogicalPlan::TableScan(TableScan { @@ -1290,7 +1306,9 @@ impl LogicalPlan { _ => None, } } - JoinType::LeftSemi | JoinType::LeftAnti => left.max_rows(), + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { + left.max_rows() + } JoinType::RightSemi | JoinType::RightAnti => right.max_rows(), }, LogicalPlan::Repartition(Repartition { input, .. }) => input.max_rows(), @@ -1326,6 +1344,7 @@ impl LogicalPlan { | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) | LogicalPlan::Prepare(_) + | LogicalPlan::Execute(_) | LogicalPlan::Statement(_) | LogicalPlan::Extension(_) => None, } @@ -1929,6 +1948,9 @@ impl LogicalPlan { }) => { write!(f, "Prepare: {name:?} {data_types:?} ") } + LogicalPlan::Execute(Execute { name, parameters, .. }) => { + write!(f, "Execute: {} params=[{}]", name, expr_vec_fmt!(parameters)) + } LogicalPlan::DescribeTable(DescribeTable { .. }) => { write!(f, "DescribeTable") } @@ -2595,6 +2617,27 @@ pub struct Prepare { pub input: Arc, } +/// Execute a prepared statement. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Execute { + /// The name of the prepared statement to execute + pub name: String, + /// The execute parameters + pub parameters: Vec, + /// Dummy schema + pub schema: DFSchemaRef, +} + +// Comparison excludes the `schema` field. +impl PartialOrd for Execute { + fn partial_cmp(&self, other: &Self) -> Option { + match self.name.partial_cmp(&other.name) { + Some(Ordering::Equal) => self.parameters.partial_cmp(&other.parameters), + cmp => cmp, + } + } +} + /// Describe the schema of table /// /// # Example output: diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index 0658f7029740..ff2c1ec1d58f 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -38,10 +38,10 @@ //! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions use crate::{ dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, DdlStatement, - Distinct, DistinctOn, DmlStatement, Explain, Expr, Extension, Filter, Join, Limit, - LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition, Sort, - Subquery, SubqueryAlias, TableScan, Union, Unnest, UserDefinedLogicalNode, Values, - Window, + Distinct, DistinctOn, DmlStatement, Execute, Explain, Expr, Extension, Filter, Join, + Limit, LogicalPlan, Partitioning, Prepare, Projection, RecursiveQuery, Repartition, + Sort, Subquery, SubqueryAlias, TableScan, Union, Unnest, UserDefinedLogicalNode, + Values, Window, }; use std::ops::Deref; use std::sync::Arc; @@ -363,6 +363,7 @@ impl TreeNode for LogicalPlan { | LogicalPlan::Statement { .. } | LogicalPlan::EmptyRelation { .. } | LogicalPlan::Values { .. } + | LogicalPlan::Execute { .. } | LogicalPlan::DescribeTable(_) => Transformed::no(self), }) } @@ -505,6 +506,9 @@ impl LogicalPlan { .chain(fetch.iter()) .map(|e| e.deref()) .apply_until_stop(f), + LogicalPlan::Execute(Execute { parameters, .. }) => { + parameters.iter().apply_until_stop(f) + } // plans without expressions LogicalPlan::EmptyRelation(_) | LogicalPlan::RecursiveQuery(_) @@ -734,6 +738,20 @@ impl LogicalPlan { }) }) } + LogicalPlan::Execute(Execute { + parameters, + name, + schema, + }) => parameters + .into_iter() + .map_until_stop_and_collect(f)? + .update_data(|parameters| { + LogicalPlan::Execute(Execute { + parameters, + name, + schema, + }) + }), // plans without expressions LogicalPlan::EmptyRelation(_) | LogicalPlan::Unnest(_) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 83563603f2f3..b5e9a555c2da 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -193,6 +193,7 @@ impl ScalarUDF { /// Invoke the function on `args`, returning the appropriate result. /// /// See [`ScalarUDFImpl::invoke`] for more details. + #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] pub fn invoke(&self, args: &[ColumnarValue]) -> Result { self.inner.invoke(args) } @@ -215,13 +216,14 @@ impl ScalarUDF { /// Invoke the function without `args` but number of rows, returning the appropriate result. /// /// See [`ScalarUDFImpl::invoke_no_args`] for more details. + #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] pub fn invoke_no_args(&self, number_rows: usize) -> Result { self.inner.invoke_no_args(number_rows) } /// Returns a `ScalarFunctionImplementation` that can invoke the function /// during execution - #[deprecated(since = "42.0.0", note = "Use `invoke` or `invoke_no_args` instead")] + #[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")] pub fn fun(&self) -> ScalarFunctionImplementation { let captured = Arc::clone(&self.inner); Arc::new(move |args| captured.invoke(args)) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 6ab94c1e841a..124625280670 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -312,10 +312,7 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// Returns the expressions that are passed to the [`PartitionEvaluator`]. fn expressions(&self, expr_args: ExpressionArgs) -> Vec> { - expr_args - .input_exprs() - .first() - .map_or(vec![], |expr| vec![Arc::clone(expr)]) + expr_args.input_exprs().into() } /// Invoke the function, returning the [`PartitionEvaluator`] instance diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml new file mode 100644 index 000000000000..119747342515 --- /dev/null +++ b/datafusion/ffi/Cargo.toml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-ffi" +description = "Foreign Function Interface implementation for DataFusion" +readme = "README.md" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +authors = { workspace = true } +# Specify MSRV here as `cargo msrv` doesn't support workspace version +rust-version = "1.76" + +[lints] +workspace = true + +[lib] +name = "datafusion_ffi" +path = "src/lib.rs" + +[dependencies] +abi_stable = "0.11.3" +arrow = { workspace = true, features = ["ffi"] } +async-ffi = { version = "0.5.0", features = ["abi_stable"] } +async-trait = { workspace = true } +datafusion = { workspace = true, default-features = false } +datafusion-proto = { workspace = true } +doc-comment = { workspace = true } +futures = { workspace = true } +log = { workspace = true } +prost = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true } diff --git a/datafusion/ffi/README.md b/datafusion/ffi/README.md new file mode 100644 index 000000000000..ba4bb8b961a1 --- /dev/null +++ b/datafusion/ffi/README.md @@ -0,0 +1,81 @@ + + +# `datafusion-ffi`: Apache DataFusion Foreign Function Interface + +This crate contains code to allow interoperability of Apache [DataFusion] +with functions from other languages using a stable interface. + +See [API Docs] for details and examples. + +We expect this crate may be used by both sides of the FFI. This allows users +to create modules that can interoperate with the necessity of using the same +version of DataFusion. The driving use case has been the `datafusion-python` +repository, but many other use cases may exist. We envision at least two +use cases. + +1. `datafusion-python` which will use the FFI to provide external services such + as a `TableProvider` without needing to re-export the entire `datafusion-python` + code base. With `datafusion-ffi` these packages do not need `datafusion-python` + as a dependency at all. +2. Users may want to create a modular interface that allows runtime loading of + libraries. + +## Struct Layout + +In this crate we have a variety of structs which closely mimic the behavior of +their internal counterparts. In the following example, we will refer to the +`TableProvider`, but the same pattern exists for other structs. + +Each of the exposted structs in this crate is provided with a variant prefixed +with `Foreign`. This variant is designed to be used by the consumer of the +foreign code. The `Foreign` structs should _never_ access the `private_data` +fields. Instead they should only access the data returned through the function +calls defined on the `FFI_` structs. The second purpose of the `Foreign` +structs is to contain additional data that may be needed by the traits that +are implemented on them. Some of these traits require borrowing data which +can be far more convienent to be locally stored. + +For example, we have a struct `FFI_TableProvider` to give access to the +`TableProvider` functions like `table_type()` and `scan()`. If we write a +library that wishes to expose it's `TableProvider`, then we can access the +private data that contains the Arc reference to the `TableProvider` via +`FFI_TableProvider`. This data is local to the library. + +If we have a program that accesses a `TableProvider` via FFI, then it +will use `ForeignTableProvider`. When using `ForeignTableProvider` we **must** +not attempt to access the `private_data` field in `FFI_TableProvider`. If a +user is testing locally, you may be able to successfully access this field, but +it will only work if you are building against the exact same version of +`DataFusion` for both libraries **and** the same compiler. It will not work +in general. + +It is worth noting that which library is the `local` and which is `foreign` +depends on which interface we are considering. For example, suppose we have a +Python library called `my_provider` that exposes a `TableProvider` called +`MyProvider` via `FFI_TableProvider`. Within the library `my_provider` we can +access the `private_data` via `FFI_TableProvider`. We connect this to +`datafusion-python`, where we access it as a `ForeignTableProvider`. Now when +we call `scan()` on this interface, we have to pass it a `FFI_SessionConfig`. +The `SessionConfig` is local to `datafusion-python` and **not** `my_provider`. +It is important to be careful when expanding these functions to be certain which +side of the interface each object refers to. + +[datafusion]: https://datafusion.apache.org +[api docs]: http://docs.rs/datafusion-ffi/latest diff --git a/datafusion/ffi/src/arrow_wrappers.rs b/datafusion/ffi/src/arrow_wrappers.rs new file mode 100644 index 000000000000..c5add8782c51 --- /dev/null +++ b/datafusion/ffi/src/arrow_wrappers.rs @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use abi_stable::StableAbi; +use arrow::{ + datatypes::{Schema, SchemaRef}, + ffi::{FFI_ArrowArray, FFI_ArrowSchema}, +}; +use log::error; + +/// This is a wrapper struct around FFI_ArrowSchema simply to indicate +/// to the StableAbi macros that the underlying struct is FFI safe. +#[repr(C)] +#[derive(Debug, StableAbi)] +pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema); + +impl From for WrappedSchema { + fn from(value: SchemaRef) -> Self { + let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) { + Ok(s) => s, + Err(e) => { + error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {}", e); + FFI_ArrowSchema::empty() + } + }; + + WrappedSchema(ffi_schema) + } +} + +impl From for SchemaRef { + fn from(value: WrappedSchema) -> Self { + let schema = match Schema::try_from(&value.0) { + Ok(s) => s, + Err(e) => { + error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {}", e); + Schema::empty() + } + }; + Arc::new(schema) + } +} + +/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi +/// that the struct is FFI Safe. For convenience, we also include the +/// schema needed to create a record batch from the array. +#[repr(C)] +#[derive(Debug, StableAbi)] +pub struct WrappedArray { + #[sabi(unsafe_opaque_field)] + pub array: FFI_ArrowArray, + + pub schema: WrappedSchema, +} diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs new file mode 100644 index 000000000000..d10eda8990b8 --- /dev/null +++ b/datafusion/ffi/src/execution_plan.rs @@ -0,0 +1,361 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::c_void, pin::Pin, sync::Arc}; + +use abi_stable::{ + std_types::{RResult, RString, RVec}, + StableAbi, +}; +use datafusion::error::Result; +use datafusion::{ + error::DataFusionError, + execution::{SendableRecordBatchStream, TaskContext}, + physical_plan::{DisplayAs, ExecutionPlan, PlanProperties}, +}; + +use crate::{ + plan_properties::FFI_PlanProperties, record_batch_stream::FFI_RecordBatchStream, +}; + +/// A stable struct for sharing a [`ExecutionPlan`] across FFI boundaries. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_ExecutionPlan { + /// Return the plan properties + pub properties: unsafe extern "C" fn(plan: &Self) -> FFI_PlanProperties, + + /// Return a vector of children plans + pub children: unsafe extern "C" fn(plan: &Self) -> RVec, + + /// Return the plan name. + pub name: unsafe extern "C" fn(plan: &Self) -> RString, + + /// Execute the plan and return a record batch stream. Errors + /// will be returned as a string. + pub execute: unsafe extern "C" fn( + plan: &Self, + partition: usize, + ) -> RResult, + + /// Used to create a clone on the provider of the execution plan. This should + /// only need to be called by the receiver of the plan. + pub clone: unsafe extern "C" fn(plan: &Self) -> Self, + + /// Release the memory of the private data when it is no longer being used. + pub release: unsafe extern "C" fn(arg: &mut Self), + + /// Internal data. This is only to be accessed by the provider of the plan. + /// A [`ForeignExecutionPlan`] should never attempt to access this data. + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_ExecutionPlan {} +unsafe impl Sync for FFI_ExecutionPlan {} + +pub struct ExecutionPlanPrivateData { + pub plan: Arc, + pub context: Arc, +} + +unsafe extern "C" fn properties_fn_wrapper( + plan: &FFI_ExecutionPlan, +) -> FFI_PlanProperties { + let private_data = plan.private_data as *const ExecutionPlanPrivateData; + let plan = &(*private_data).plan; + + plan.properties().into() +} + +unsafe extern "C" fn children_fn_wrapper( + plan: &FFI_ExecutionPlan, +) -> RVec { + let private_data = plan.private_data as *const ExecutionPlanPrivateData; + let plan = &(*private_data).plan; + let ctx = &(*private_data).context; + + let children: Vec<_> = plan + .children() + .into_iter() + .map(|child| FFI_ExecutionPlan::new(Arc::clone(child), Arc::clone(ctx))) + .collect(); + + children.into() +} + +unsafe extern "C" fn execute_fn_wrapper( + plan: &FFI_ExecutionPlan, + partition: usize, +) -> RResult { + let private_data = plan.private_data as *const ExecutionPlanPrivateData; + let plan = &(*private_data).plan; + let ctx = &(*private_data).context; + + match plan.execute(partition, Arc::clone(ctx)) { + Ok(rbs) => RResult::ROk(rbs.into()), + Err(e) => RResult::RErr( + format!("Error occurred during FFI_ExecutionPlan execute: {}", e).into(), + ), + } +} +unsafe extern "C" fn name_fn_wrapper(plan: &FFI_ExecutionPlan) -> RString { + let private_data = plan.private_data as *const ExecutionPlanPrivateData; + let plan = &(*private_data).plan; + + plan.name().into() +} + +unsafe extern "C" fn release_fn_wrapper(plan: &mut FFI_ExecutionPlan) { + let private_data = Box::from_raw(plan.private_data as *mut ExecutionPlanPrivateData); + drop(private_data); +} + +unsafe extern "C" fn clone_fn_wrapper(plan: &FFI_ExecutionPlan) -> FFI_ExecutionPlan { + let private_data = plan.private_data as *const ExecutionPlanPrivateData; + let plan_data = &(*private_data); + + FFI_ExecutionPlan::new(Arc::clone(&plan_data.plan), Arc::clone(&plan_data.context)) +} + +impl Clone for FFI_ExecutionPlan { + fn clone(&self) -> Self { + unsafe { (self.clone)(self) } + } +} + +impl FFI_ExecutionPlan { + /// This function is called on the provider's side. + pub fn new(plan: Arc, context: Arc) -> Self { + let private_data = Box::new(ExecutionPlanPrivateData { plan, context }); + + Self { + properties: properties_fn_wrapper, + children: children_fn_wrapper, + name: name_fn_wrapper, + execute: execute_fn_wrapper, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + private_data: Box::into_raw(private_data) as *mut c_void, + } + } +} + +impl Drop for FFI_ExecutionPlan { + fn drop(&mut self) { + unsafe { (self.release)(self) } + } +} + +/// This struct is used to access an execution plan provided by a foreign +/// library across a FFI boundary. +/// +/// The ForeignExecutionPlan is to be used by the caller of the plan, so it has +/// no knowledge or access to the private data. All interaction with the plan +/// must occur through the functions defined in FFI_ExecutionPlan. +#[derive(Debug)] +pub struct ForeignExecutionPlan { + name: String, + plan: FFI_ExecutionPlan, + properties: PlanProperties, + children: Vec>, +} + +unsafe impl Send for ForeignExecutionPlan {} +unsafe impl Sync for ForeignExecutionPlan {} + +impl DisplayAs for ForeignExecutionPlan { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!( + f, + "FFI_ExecutionPlan(number_of_children={})", + self.children.len(), + ) + } +} + +impl TryFrom<&FFI_ExecutionPlan> for ForeignExecutionPlan { + type Error = DataFusionError; + + fn try_from(plan: &FFI_ExecutionPlan) -> Result { + unsafe { + let name = (plan.name)(plan).into(); + + let properties: PlanProperties = (plan.properties)(plan).try_into()?; + + let children_rvec = (plan.children)(plan); + let children: Result> = children_rvec + .iter() + .map(ForeignExecutionPlan::try_from) + .map(|child| child.map(|c| Arc::new(c) as Arc)) + .collect(); + + Ok(Self { + name, + plan: plan.clone(), + properties, + children: children?, + }) + } + } +} + +impl ExecutionPlan for ForeignExecutionPlan { + fn name(&self) -> &str { + &self.name + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + self.children + .iter() + .map(|p| p as &Arc) + .collect() + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + Ok(Arc::new(ForeignExecutionPlan { + plan: self.plan.clone(), + name: self.name.clone(), + children, + properties: self.properties.clone(), + })) + } + + fn execute( + &self, + partition: usize, + _context: Arc, + ) -> Result { + unsafe { + match (self.plan.execute)(&self.plan, partition) { + RResult::ROk(stream) => { + let stream = Pin::new(Box::new(stream)) as SendableRecordBatchStream; + Ok(stream) + } + RResult::RErr(e) => Err(DataFusionError::Execution(format!( + "Error occurred during FFI call to FFI_ExecutionPlan execute. {}", + e + ))), + } + } + } +} + +#[cfg(test)] +mod tests { + use datafusion::{physical_plan::Partitioning, prelude::SessionContext}; + + use super::*; + + #[derive(Debug)] + pub struct EmptyExec { + props: PlanProperties, + } + + impl EmptyExec { + pub fn new(schema: arrow::datatypes::SchemaRef) -> Self { + Self { + props: PlanProperties::new( + datafusion::physical_expr::EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(3), + datafusion::physical_plan::ExecutionMode::Unbounded, + ), + } + } + } + + impl DisplayAs for EmptyExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + _f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + unimplemented!() + } + } + + impl ExecutionPlan for EmptyExec { + fn name(&self) -> &'static str { + "empty-exec" + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.props + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _: Vec>, + ) -> Result> { + unimplemented!() + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + unimplemented!() + } + + fn statistics(&self) -> Result { + unimplemented!() + } + } + + #[test] + fn test_round_trip_ffi_execution_plan() -> Result<()> { + use arrow::datatypes::{DataType, Field, Schema}; + let schema = + Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)])); + let ctx = SessionContext::new(); + + let original_plan = Arc::new(EmptyExec::new(schema)); + let original_name = original_plan.name().to_string(); + + let local_plan = FFI_ExecutionPlan::new(original_plan, ctx.task_ctx()); + + let foreign_plan: ForeignExecutionPlan = (&local_plan).try_into()?; + + assert!(original_name == foreign_plan.name()); + + Ok(()) + } +} diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs new file mode 100644 index 000000000000..4a74e65dc671 --- /dev/null +++ b/datafusion/ffi/src/lib.rs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143 +#![deny(clippy::clone_on_ref_ptr)] + +pub mod arrow_wrappers; +pub mod execution_plan; +pub mod plan_properties; +pub mod record_batch_stream; +pub mod session_config; +pub mod table_provider; +pub mod table_source; + +#[cfg(doctest)] +doc_comment::doctest!("../README.md", readme_example_test); diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs new file mode 100644 index 000000000000..722681ae4a1d --- /dev/null +++ b/datafusion/ffi/src/plan_properties.rs @@ -0,0 +1,297 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::c_void, sync::Arc}; + +use abi_stable::{ + std_types::{ + RResult::{self, RErr, ROk}, + RStr, RVec, + }, + StableAbi, +}; +use arrow::datatypes::SchemaRef; +use datafusion::{ + error::{DataFusionError, Result}, + physical_expr::EquivalenceProperties, + physical_plan::{ExecutionMode, PlanProperties}, + prelude::SessionContext, +}; +use datafusion_proto::{ + physical_plan::{ + from_proto::{parse_physical_sort_exprs, parse_protobuf_partitioning}, + to_proto::{serialize_partitioning, serialize_physical_sort_exprs}, + DefaultPhysicalExtensionCodec, + }, + protobuf::{Partitioning, PhysicalSortExprNodeCollection}, +}; +use prost::Message; + +use crate::arrow_wrappers::WrappedSchema; + +/// A stable struct for sharing [`PlanProperties`] across FFI boundaries. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_PlanProperties { + /// The output partitioning is a [`Partitioning`] protobuf message serialized + /// into bytes to pass across the FFI boundary. + pub output_partitioning: + unsafe extern "C" fn(plan: &Self) -> RResult, RStr<'static>>, + + /// Return the execution mode of the plan. + pub execution_mode: unsafe extern "C" fn(plan: &Self) -> FFI_ExecutionMode, + + /// The output ordering is a [`PhysicalSortExprNodeCollection`] protobuf message + /// serialized into bytes to pass across the FFI boundary. + pub output_ordering: + unsafe extern "C" fn(plan: &Self) -> RResult, RStr<'static>>, + + /// Return the schema of the plan. + pub schema: unsafe extern "C" fn(plan: &Self) -> WrappedSchema, + + /// Release the memory of the private data when it is no longer being used. + pub release: unsafe extern "C" fn(arg: &mut Self), + + /// Internal data. This is only to be accessed by the provider of the plan. + /// The foreign library should never attempt to access this data. + pub private_data: *mut c_void, +} + +struct PlanPropertiesPrivateData { + props: PlanProperties, +} + +unsafe extern "C" fn output_partitioning_fn_wrapper( + properties: &FFI_PlanProperties, +) -> RResult, RStr<'static>> { + let private_data = properties.private_data as *const PlanPropertiesPrivateData; + let props = &(*private_data).props; + + let codec = DefaultPhysicalExtensionCodec {}; + let partitioning_data = + match serialize_partitioning(props.output_partitioning(), &codec) { + Ok(p) => p, + Err(_) => { + return RErr( + "unable to serialize output_partitioning in FFI_PlanProperties" + .into(), + ) + } + }; + let output_partitioning = partitioning_data.encode_to_vec(); + + ROk(output_partitioning.into()) +} + +unsafe extern "C" fn execution_mode_fn_wrapper( + properties: &FFI_PlanProperties, +) -> FFI_ExecutionMode { + let private_data = properties.private_data as *const PlanPropertiesPrivateData; + let props = &(*private_data).props; + props.execution_mode().into() +} + +unsafe extern "C" fn output_ordering_fn_wrapper( + properties: &FFI_PlanProperties, +) -> RResult, RStr<'static>> { + let private_data = properties.private_data as *const PlanPropertiesPrivateData; + let props = &(*private_data).props; + + let codec = DefaultPhysicalExtensionCodec {}; + let output_ordering = + match props.output_ordering() { + Some(ordering) => { + let physical_sort_expr_nodes = + match serialize_physical_sort_exprs(ordering.to_owned(), &codec) { + Ok(v) => v, + Err(_) => return RErr( + "unable to serialize output_ordering in FFI_PlanProperties" + .into(), + ), + }; + + let ordering_data = PhysicalSortExprNodeCollection { + physical_sort_expr_nodes, + }; + + ordering_data.encode_to_vec() + } + None => Vec::default(), + }; + ROk(output_ordering.into()) +} + +unsafe extern "C" fn schema_fn_wrapper(properties: &FFI_PlanProperties) -> WrappedSchema { + let private_data = properties.private_data as *const PlanPropertiesPrivateData; + let props = &(*private_data).props; + + let schema: SchemaRef = Arc::clone(props.eq_properties.schema()); + schema.into() +} + +unsafe extern "C" fn release_fn_wrapper(props: &mut FFI_PlanProperties) { + let private_data = + Box::from_raw(props.private_data as *mut PlanPropertiesPrivateData); + drop(private_data); +} + +impl Drop for FFI_PlanProperties { + fn drop(&mut self) { + unsafe { (self.release)(self) } + } +} + +impl From<&PlanProperties> for FFI_PlanProperties { + fn from(props: &PlanProperties) -> Self { + let private_data = Box::new(PlanPropertiesPrivateData { + props: props.clone(), + }); + + FFI_PlanProperties { + output_partitioning: output_partitioning_fn_wrapper, + execution_mode: execution_mode_fn_wrapper, + output_ordering: output_ordering_fn_wrapper, + schema: schema_fn_wrapper, + release: release_fn_wrapper, + private_data: Box::into_raw(private_data) as *mut c_void, + } + } +} + +impl TryFrom for PlanProperties { + type Error = DataFusionError; + + fn try_from(ffi_props: FFI_PlanProperties) -> Result { + let ffi_schema = unsafe { (ffi_props.schema)(&ffi_props) }; + let schema = (&ffi_schema.0).try_into()?; + + // TODO Extend FFI to get the registry and codex + let default_ctx = SessionContext::new(); + let codex = DefaultPhysicalExtensionCodec {}; + + let ffi_orderings = unsafe { (ffi_props.output_ordering)(&ffi_props) }; + let orderings = match ffi_orderings { + ROk(ordering_vec) => { + let proto_output_ordering = + PhysicalSortExprNodeCollection::decode(ordering_vec.as_ref()) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + Some(parse_physical_sort_exprs( + &proto_output_ordering.physical_sort_expr_nodes, + &default_ctx, + &schema, + &codex, + )?) + } + RErr(e) => return Err(DataFusionError::Plan(e.to_string())), + }; + + let ffi_partitioning = unsafe { (ffi_props.output_partitioning)(&ffi_props) }; + let partitioning = match ffi_partitioning { + ROk(partitioning_vec) => { + let proto_output_partitioning = + Partitioning::decode(partitioning_vec.as_ref()) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + parse_protobuf_partitioning( + Some(&proto_output_partitioning), + &default_ctx, + &schema, + &codex, + )? + .ok_or(DataFusionError::Plan( + "Unable to deserialize partitioning protobuf in FFI_PlanProperties" + .to_string(), + )) + } + RErr(e) => Err(DataFusionError::Plan(e.to_string())), + }?; + + let execution_mode: ExecutionMode = + unsafe { (ffi_props.execution_mode)(&ffi_props).into() }; + + let eq_properties = match orderings { + Some(ordering) => { + EquivalenceProperties::new_with_orderings(Arc::new(schema), &[ordering]) + } + None => EquivalenceProperties::new(Arc::new(schema)), + }; + + Ok(PlanProperties::new( + eq_properties, + partitioning, + execution_mode, + )) + } +} + +/// FFI safe version of [`ExecutionMode`]. +#[repr(C)] +#[allow(non_camel_case_types)] +#[derive(Clone, StableAbi)] +pub enum FFI_ExecutionMode { + Bounded, + Unbounded, + PipelineBreaking, +} + +impl From for FFI_ExecutionMode { + fn from(value: ExecutionMode) -> Self { + match value { + ExecutionMode::Bounded => FFI_ExecutionMode::Bounded, + ExecutionMode::Unbounded => FFI_ExecutionMode::Unbounded, + ExecutionMode::PipelineBreaking => FFI_ExecutionMode::PipelineBreaking, + } + } +} + +impl From for ExecutionMode { + fn from(value: FFI_ExecutionMode) -> Self { + match value { + FFI_ExecutionMode::Bounded => ExecutionMode::Bounded, + FFI_ExecutionMode::Unbounded => ExecutionMode::Unbounded, + FFI_ExecutionMode::PipelineBreaking => ExecutionMode::PipelineBreaking, + } + } +} + +#[cfg(test)] +mod tests { + use datafusion::physical_plan::Partitioning; + + use super::*; + + #[test] + fn test_round_trip_ffi_plan_properties() -> Result<()> { + use arrow::datatypes::{DataType, Field, Schema}; + let schema = + Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)])); + + let original_props = PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(3), + ExecutionMode::Unbounded, + ); + + let local_props_ptr = FFI_PlanProperties::from(&original_props); + + let foreign_props: PlanProperties = local_props_ptr.try_into()?; + + assert!(format!("{:?}", foreign_props) == format!("{:?}", original_props)); + + Ok(()) + } +} diff --git a/datafusion/ffi/src/record_batch_stream.rs b/datafusion/ffi/src/record_batch_stream.rs new file mode 100644 index 000000000000..c944e56c5cde --- /dev/null +++ b/datafusion/ffi/src/record_batch_stream.rs @@ -0,0 +1,176 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::c_void, task::Poll}; + +use abi_stable::{ + std_types::{ROption, RResult, RString}, + StableAbi, +}; +use arrow::array::{Array, RecordBatch}; +use arrow::{ + array::{make_array, StructArray}, + ffi::{from_ffi, to_ffi}, +}; +use async_ffi::{ContextExt, FfiContext, FfiPoll}; +use datafusion::error::Result; +use datafusion::{ + error::DataFusionError, + execution::{RecordBatchStream, SendableRecordBatchStream}, +}; +use futures::{Stream, TryStreamExt}; + +use crate::arrow_wrappers::{WrappedArray, WrappedSchema}; + +/// A stable struct for sharing [`RecordBatchStream`] across FFI boundaries. +/// We use the async-ffi crate for handling async calls across libraries. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_RecordBatchStream { + /// This mirrors the `poll_next` of [`RecordBatchStream`] but does so + /// in a FFI safe manner. + pub poll_next: + unsafe extern "C" fn( + stream: &Self, + cx: &mut FfiContext, + ) -> FfiPoll>>, + + /// Return the schema of the record batch + pub schema: unsafe extern "C" fn(stream: &Self) -> WrappedSchema, + + /// Internal data. This is only to be accessed by the provider of the plan. + /// The foreign library should never attempt to access this data. + pub private_data: *mut c_void, +} + +impl From for FFI_RecordBatchStream { + fn from(stream: SendableRecordBatchStream) -> Self { + FFI_RecordBatchStream { + poll_next: poll_next_fn_wrapper, + schema: schema_fn_wrapper, + private_data: Box::into_raw(Box::new(stream)) as *mut c_void, + } + } +} + +unsafe impl Send for FFI_RecordBatchStream {} + +unsafe extern "C" fn schema_fn_wrapper(stream: &FFI_RecordBatchStream) -> WrappedSchema { + let stream = stream.private_data as *const SendableRecordBatchStream; + + (*stream).schema().into() +} + +fn record_batch_to_wrapped_array( + record_batch: RecordBatch, +) -> RResult { + let struct_array = StructArray::from(record_batch); + match to_ffi(&struct_array.to_data()) { + Ok((array, schema)) => RResult::ROk(WrappedArray { + array, + schema: WrappedSchema(schema), + }), + Err(e) => RResult::RErr(e.to_string().into()), + } +} + +// probably want to use pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result { +fn maybe_record_batch_to_wrapped_stream( + record_batch: Option>, +) -> ROption> { + match record_batch { + Some(Ok(record_batch)) => { + ROption::RSome(record_batch_to_wrapped_array(record_batch)) + } + Some(Err(e)) => ROption::RSome(RResult::RErr(e.to_string().into())), + None => ROption::RNone, + } +} + +unsafe extern "C" fn poll_next_fn_wrapper( + stream: &FFI_RecordBatchStream, + cx: &mut FfiContext, +) -> FfiPoll>> { + let stream = stream.private_data as *mut SendableRecordBatchStream; + + let poll_result = cx.with_context(|std_cx| { + (*stream) + .try_poll_next_unpin(std_cx) + .map(maybe_record_batch_to_wrapped_stream) + }); + + poll_result.into() +} + +impl RecordBatchStream for FFI_RecordBatchStream { + fn schema(&self) -> arrow::datatypes::SchemaRef { + let wrapped_schema = unsafe { (self.schema)(self) }; + wrapped_schema.into() + } +} + +fn wrapped_array_to_record_batch(array: WrappedArray) -> Result { + let array_data = + unsafe { from_ffi(array.array, &array.schema.0).map_err(DataFusionError::from)? }; + let array = make_array(array_data); + let struct_array = array + .as_any() + .downcast_ref::() + .ok_or(DataFusionError::Execution( + "Unexpected array type during record batch collection in FFI_RecordBatchStream" + .to_string(), + ))?; + + Ok(struct_array.into()) +} + +fn maybe_wrapped_array_to_record_batch( + array: ROption>, +) -> Option> { + match array { + ROption::RSome(RResult::ROk(wrapped_array)) => { + Some(wrapped_array_to_record_batch(wrapped_array)) + } + ROption::RSome(RResult::RErr(e)) => { + Some(Err(DataFusionError::Execution(e.to_string()))) + } + ROption::RNone => None, + } +} + +impl Stream for FFI_RecordBatchStream { + type Item = Result; + + fn poll_next( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let poll_result = + unsafe { cx.with_ffi_context(|ffi_cx| (self.poll_next)(&self, ffi_cx)) }; + + match poll_result { + FfiPoll::Ready(array) => { + Poll::Ready(maybe_wrapped_array_to_record_batch(array)) + } + FfiPoll::Pending => Poll::Pending, + FfiPoll::Panicked => Poll::Ready(Some(Err(DataFusionError::Execution( + "Error occurred during poll_next on FFI_RecordBatchStream".to_string(), + )))), + } + } +} diff --git a/datafusion/ffi/src/session_config.rs b/datafusion/ffi/src/session_config.rs new file mode 100644 index 000000000000..aea03cf94e0a --- /dev/null +++ b/datafusion/ffi/src/session_config.rs @@ -0,0 +1,187 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + collections::HashMap, + ffi::{c_char, c_void, CString}, +}; + +use abi_stable::{ + std_types::{RHashMap, RString}, + StableAbi, +}; +use datafusion::{config::ConfigOptions, error::Result}; +use datafusion::{error::DataFusionError, prelude::SessionConfig}; + +/// A stable struct for sharing [`SessionConfig`] across FFI boundaries. +/// Instead of attempting to expose the entire SessionConfig interface, we +/// convert the config options into a map from a string to string and pass +/// those values across the FFI boundary. On the receiver side, we +/// reconstruct a SessionConfig from those values. +/// +/// It is possible that using different versions of DataFusion across the +/// FFI boundary could have differing expectations of the config options. +/// This is a limitation of this approach, but exposing the entire +/// SessionConfig via a FFI interface would be extensive and provide limited +/// value over this version. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_SessionConfig { + /// Return a hash map from key to value of the config options represented + /// by string values. + pub config_options: unsafe extern "C" fn(config: &Self) -> RHashMap, + + /// Used to create a clone on the provider of the execution plan. This should + /// only need to be called by the receiver of the plan. + pub clone: unsafe extern "C" fn(plan: &Self) -> Self, + + /// Release the memory of the private data when it is no longer being used. + pub release: unsafe extern "C" fn(arg: &mut Self), + + /// Internal data. This is only to be accessed by the provider of the plan. + /// A [`ForeignSessionConfig`] should never attempt to access this data. + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_SessionConfig {} +unsafe impl Sync for FFI_SessionConfig {} + +unsafe extern "C" fn config_options_fn_wrapper( + config: &FFI_SessionConfig, +) -> RHashMap { + let private_data = config.private_data as *mut SessionConfigPrivateData; + let config_options = &(*private_data).config; + + let mut options = RHashMap::default(); + for config_entry in config_options.entries() { + if let Some(value) = config_entry.value { + options.insert(config_entry.key.into(), value.into()); + } + } + + options +} + +unsafe extern "C" fn release_fn_wrapper(config: &mut FFI_SessionConfig) { + let private_data = + Box::from_raw(config.private_data as *mut SessionConfigPrivateData); + drop(private_data); +} + +unsafe extern "C" fn clone_fn_wrapper(config: &FFI_SessionConfig) -> FFI_SessionConfig { + let old_private_data = config.private_data as *mut SessionConfigPrivateData; + let old_config = &(*old_private_data).config; + + let private_data = Box::new(SessionConfigPrivateData { + config: old_config.clone(), + }); + + FFI_SessionConfig { + config_options: config_options_fn_wrapper, + private_data: Box::into_raw(private_data) as *mut c_void, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + } +} + +struct SessionConfigPrivateData { + pub config: ConfigOptions, +} + +impl From<&SessionConfig> for FFI_SessionConfig { + fn from(session: &SessionConfig) -> Self { + let mut config_keys = Vec::new(); + let mut config_values = Vec::new(); + for config_entry in session.options().entries() { + if let Some(value) = config_entry.value { + let key_cstr = CString::new(config_entry.key).unwrap_or_default(); + let key_ptr = key_cstr.into_raw() as *const c_char; + config_keys.push(key_ptr); + + config_values + .push(CString::new(value).unwrap_or_default().into_raw() + as *const c_char); + } + } + + let private_data = Box::new(SessionConfigPrivateData { + config: session.options().clone(), + }); + + Self { + config_options: config_options_fn_wrapper, + private_data: Box::into_raw(private_data) as *mut c_void, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + } + } +} + +impl Clone for FFI_SessionConfig { + fn clone(&self) -> Self { + unsafe { (self.clone)(self) } + } +} + +impl Drop for FFI_SessionConfig { + fn drop(&mut self) { + unsafe { (self.release)(self) }; + } +} + +/// A wrapper struct for accessing [`SessionConfig`] across a FFI boundary. +/// The [`SessionConfig`] will be generated from a hash map of the config +/// options in the provider and will be reconstructed on this side of the +/// interface.s +pub struct ForeignSessionConfig(pub SessionConfig); + +impl TryFrom<&FFI_SessionConfig> for ForeignSessionConfig { + type Error = DataFusionError; + + fn try_from(config: &FFI_SessionConfig) -> Result { + let config_options = unsafe { (config.config_options)(config) }; + + let mut options_map = HashMap::new(); + config_options.iter().for_each(|kv_pair| { + options_map.insert(kv_pair.0.to_string(), kv_pair.1.to_string()); + }); + + Ok(Self(SessionConfig::from_string_hash_map(&options_map)?)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_round_trip_ffi_session_config() -> Result<()> { + let session_config = SessionConfig::new(); + let original_options = session_config.options().entries(); + + let ffi_config: FFI_SessionConfig = (&session_config).into(); + + let foreign_config: ForeignSessionConfig = (&ffi_config).try_into()?; + + let returned_options = foreign_config.0.options().entries(); + + assert!(original_options.len() == returned_options.len()); + + Ok(()) + } +} diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs new file mode 100644 index 000000000000..011ad96e423d --- /dev/null +++ b/datafusion/ffi/src/table_provider.rs @@ -0,0 +1,443 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{any::Any, ffi::c_void, sync::Arc}; + +use abi_stable::{ + std_types::{ROption, RResult, RString, RVec}, + StableAbi, +}; +use arrow::datatypes::SchemaRef; +use async_ffi::{FfiFuture, FutureExt}; +use async_trait::async_trait; +use datafusion::{ + catalog::{Session, TableProvider}, + datasource::TableType, + error::DataFusionError, + execution::session_state::SessionStateBuilder, + logical_expr::TableProviderFilterPushDown, + physical_plan::ExecutionPlan, + prelude::{Expr, SessionContext}, +}; +use datafusion_proto::{ + logical_plan::{ + from_proto::parse_exprs, to_proto::serialize_exprs, DefaultLogicalExtensionCodec, + }, + protobuf::LogicalExprList, +}; +use prost::Message; + +use crate::{ + arrow_wrappers::WrappedSchema, + session_config::ForeignSessionConfig, + table_source::{FFI_TableProviderFilterPushDown, FFI_TableType}, +}; + +use super::{ + execution_plan::{FFI_ExecutionPlan, ForeignExecutionPlan}, + session_config::FFI_SessionConfig, +}; +use datafusion::error::Result; + +/// A stable struct for sharing [`TableProvider`] across FFI boundaries. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_TableProvider { + /// Return the table schema + pub schema: unsafe extern "C" fn(provider: &Self) -> WrappedSchema, + + /// Perform a scan on the table. See [`TableProvider`] for detailed usage information. + /// + /// # Arguments + /// + /// * `provider` - the table provider + /// * `session_config` - session configuration + /// * `projections` - if specified, only a subset of the columns are returned + /// * `filters_serialized` - filters to apply to the scan, which are a + /// [`LogicalExprList`] protobuf message serialized into bytes to pass + /// across the FFI boundary. + /// * `limit` - if specified, limit the number of rows returned + pub scan: unsafe extern "C" fn( + provider: &Self, + session_config: &FFI_SessionConfig, + projections: RVec, + filters_serialized: RVec, + limit: ROption, + ) -> FfiFuture>, + + /// Return the type of table. See [`TableType`] for options. + pub table_type: unsafe extern "C" fn(provider: &Self) -> FFI_TableType, + + /// Based upon the input filters, identify which are supported. The filters + /// are a [`LogicalExprList`] protobuf message serialized into bytes to pass + /// across the FFI boundary. + pub supports_filters_pushdown: Option< + unsafe extern "C" fn( + provider: &FFI_TableProvider, + filters_serialized: RVec, + ) + -> RResult, RString>, + >, + + /// Used to create a clone on the provider of the execution plan. This should + /// only need to be called by the receiver of the plan. + pub clone: unsafe extern "C" fn(plan: &Self) -> Self, + + /// Release the memory of the private data when it is no longer being used. + pub release: unsafe extern "C" fn(arg: &mut Self), + + /// Internal data. This is only to be accessed by the provider of the plan. + /// A [`ForeignExecutionPlan`] should never attempt to access this data. + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_TableProvider {} +unsafe impl Sync for FFI_TableProvider {} + +struct ProviderPrivateData { + provider: Arc, +} + +unsafe extern "C" fn schema_fn_wrapper(provider: &FFI_TableProvider) -> WrappedSchema { + let private_data = provider.private_data as *const ProviderPrivateData; + let provider = &(*private_data).provider; + + provider.schema().into() +} + +unsafe extern "C" fn table_type_fn_wrapper( + provider: &FFI_TableProvider, +) -> FFI_TableType { + let private_data = provider.private_data as *const ProviderPrivateData; + let provider = &(*private_data).provider; + + provider.table_type().into() +} + +fn supports_filters_pushdown_internal( + provider: &Arc, + filters_serialized: &[u8], +) -> Result> { + let default_ctx = SessionContext::new(); + let codec = DefaultLogicalExtensionCodec {}; + + let filters = match filters_serialized.is_empty() { + true => vec![], + false => { + let proto_filters = LogicalExprList::decode(filters_serialized) + .map_err(|e| DataFusionError::Plan(e.to_string()))?; + + parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec)? + } + }; + let filters_borrowed: Vec<&Expr> = filters.iter().collect(); + + let results: RVec<_> = provider + .supports_filters_pushdown(&filters_borrowed)? + .iter() + .map(|v| v.into()) + .collect(); + + Ok(results) +} + +unsafe extern "C" fn supports_filters_pushdown_fn_wrapper( + provider: &FFI_TableProvider, + filters_serialized: RVec, +) -> RResult, RString> { + let private_data = provider.private_data as *const ProviderPrivateData; + let provider = &(*private_data).provider; + + supports_filters_pushdown_internal(provider, &filters_serialized) + .map_err(|e| e.to_string().into()) + .into() +} + +unsafe extern "C" fn scan_fn_wrapper( + provider: &FFI_TableProvider, + session_config: &FFI_SessionConfig, + projections: RVec, + filters_serialized: RVec, + limit: ROption, +) -> FfiFuture> { + let private_data = provider.private_data as *mut ProviderPrivateData; + let internal_provider = &(*private_data).provider; + let session_config = session_config.clone(); + + async move { + let config = match ForeignSessionConfig::try_from(&session_config) { + Ok(c) => c, + Err(e) => return RResult::RErr(e.to_string().into()), + }; + let session = SessionStateBuilder::new() + .with_default_features() + .with_config(config.0) + .build(); + let ctx = SessionContext::new_with_state(session); + + let filters = match filters_serialized.is_empty() { + true => vec![], + false => { + let default_ctx = SessionContext::new(); + let codec = DefaultLogicalExtensionCodec {}; + + let proto_filters = + match LogicalExprList::decode(filters_serialized.as_ref()) { + Ok(f) => f, + Err(e) => return RResult::RErr(e.to_string().into()), + }; + + match parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec) { + Ok(f) => f, + Err(e) => return RResult::RErr(e.to_string().into()), + } + } + }; + + let projections: Vec<_> = projections.into_iter().collect(); + let maybe_projections = match projections.is_empty() { + true => None, + false => Some(&projections), + }; + + let plan = match internal_provider + .scan(&ctx.state(), maybe_projections, &filters, limit.into()) + .await + { + Ok(p) => p, + Err(e) => return RResult::RErr(e.to_string().into()), + }; + + RResult::ROk(FFI_ExecutionPlan::new(plan, ctx.task_ctx())) + } + .into_ffi() +} + +unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_TableProvider) { + let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData); + drop(private_data); +} + +unsafe extern "C" fn clone_fn_wrapper(provider: &FFI_TableProvider) -> FFI_TableProvider { + let old_private_data = provider.private_data as *const ProviderPrivateData; + + let private_data = Box::into_raw(Box::new(ProviderPrivateData { + provider: Arc::clone(&(*old_private_data).provider), + })) as *mut c_void; + + FFI_TableProvider { + schema: schema_fn_wrapper, + scan: scan_fn_wrapper, + table_type: table_type_fn_wrapper, + supports_filters_pushdown: provider.supports_filters_pushdown, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + private_data, + } +} + +impl Drop for FFI_TableProvider { + fn drop(&mut self) { + unsafe { (self.release)(self) } + } +} + +impl FFI_TableProvider { + /// Creates a new [`FFI_TableProvider`]. + pub fn new( + provider: Arc, + can_support_pushdown_filters: bool, + ) -> Self { + let private_data = Box::new(ProviderPrivateData { provider }); + + Self { + schema: schema_fn_wrapper, + scan: scan_fn_wrapper, + table_type: table_type_fn_wrapper, + supports_filters_pushdown: match can_support_pushdown_filters { + true => Some(supports_filters_pushdown_fn_wrapper), + false => None, + }, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + private_data: Box::into_raw(private_data) as *mut c_void, + } + } +} + +/// This wrapper struct exists on the reciever side of the FFI interface, so it has +/// no guarantees about being able to access the data in `private_data`. Any functions +/// defined on this struct must only use the stable functions provided in +/// FFI_TableProvider to interact with the foreign table provider. +#[derive(Debug)] +pub struct ForeignTableProvider(FFI_TableProvider); + +unsafe impl Send for ForeignTableProvider {} +unsafe impl Sync for ForeignTableProvider {} + +impl From<&FFI_TableProvider> for ForeignTableProvider { + fn from(provider: &FFI_TableProvider) -> Self { + Self(provider.clone()) + } +} + +impl Clone for FFI_TableProvider { + fn clone(&self) -> Self { + unsafe { (self.clone)(self) } + } +} + +#[async_trait] +impl TableProvider for ForeignTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + let wrapped_schema = unsafe { (self.0.schema)(&self.0) }; + wrapped_schema.into() + } + + fn table_type(&self) -> TableType { + unsafe { (self.0.table_type)(&self.0).into() } + } + + async fn scan( + &self, + session: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + let session_config: FFI_SessionConfig = session.config().into(); + + let projections: Option> = + projection.map(|p| p.iter().map(|v| v.to_owned()).collect()); + + let codec = DefaultLogicalExtensionCodec {}; + let filter_list = LogicalExprList { + expr: serialize_exprs(filters, &codec)?, + }; + let filters_serialized = filter_list.encode_to_vec().into(); + + let plan = unsafe { + let maybe_plan = (self.0.scan)( + &self.0, + &session_config, + projections.unwrap_or_default(), + filters_serialized, + limit.into(), + ) + .await; + + match maybe_plan { + RResult::ROk(p) => ForeignExecutionPlan::try_from(&p)?, + RResult::RErr(_) => { + return Err(DataFusionError::Internal( + "Unable to perform scan via FFI".to_string(), + )) + } + } + }; + + Ok(Arc::new(plan)) + } + + /// Tests whether the table provider can make use of a filter expression + /// to optimise data retrieval. + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + unsafe { + let pushdown_fn = match self.0.supports_filters_pushdown { + Some(func) => func, + None => { + return Ok(vec![ + TableProviderFilterPushDown::Unsupported; + filters.len() + ]) + } + }; + + let codec = DefaultLogicalExtensionCodec {}; + + let expr_list = LogicalExprList { + expr: serialize_exprs(filters.iter().map(|f| f.to_owned()), &codec)?, + }; + let serialized_filters = expr_list.encode_to_vec(); + + let pushdowns = pushdown_fn(&self.0, serialized_filters.into()); + + match pushdowns { + RResult::ROk(p) => Ok(p.iter().map(|v| v.into()).collect()), + RResult::RErr(e) => Err(DataFusionError::Plan(e.to_string())), + } + } + } +} + +#[cfg(test)] +mod tests { + use arrow::datatypes::Schema; + use datafusion::prelude::{col, lit}; + + use super::*; + + #[tokio::test] + async fn test_round_trip_ffi_table_provider() -> Result<()> { + use arrow::datatypes::Field; + use datafusion::arrow::{ + array::Float32Array, datatypes::DataType, record_batch::RecordBatch, + }; + use datafusion::datasource::MemTable; + + let schema = + Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)])); + + // define data in two partitions + let batch1 = RecordBatch::try_new( + Arc::clone(&schema), + vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))], + )?; + let batch2 = RecordBatch::try_new( + Arc::clone(&schema), + vec![Arc::new(Float32Array::from(vec![64.0]))], + )?; + + let ctx = SessionContext::new(); + + let provider = + Arc::new(MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?); + + let ffi_provider = FFI_TableProvider::new(provider, true); + + let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into(); + + ctx.register_table("t", Arc::new(foreign_table_provider))?; + + let df = ctx.table("t").await?; + + df.select(vec![col("a")])? + .filter(col("a").gt(lit(3.0)))? + .show() + .await?; + + Ok(()) + } +} diff --git a/datafusion/ffi/src/table_source.rs b/datafusion/ffi/src/table_source.rs new file mode 100644 index 000000000000..a59836622ee6 --- /dev/null +++ b/datafusion/ffi/src/table_source.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use abi_stable::StableAbi; +use datafusion::{datasource::TableType, logical_expr::TableProviderFilterPushDown}; + +/// FFI safe version of [`TableProviderFilterPushDown`]. +#[repr(C)] +#[derive(StableAbi)] +#[allow(non_camel_case_types)] +pub enum FFI_TableProviderFilterPushDown { + Unsupported, + Inexact, + Exact, +} + +impl From<&FFI_TableProviderFilterPushDown> for TableProviderFilterPushDown { + fn from(value: &FFI_TableProviderFilterPushDown) -> Self { + match value { + FFI_TableProviderFilterPushDown::Unsupported => { + TableProviderFilterPushDown::Unsupported + } + FFI_TableProviderFilterPushDown::Inexact => { + TableProviderFilterPushDown::Inexact + } + FFI_TableProviderFilterPushDown::Exact => TableProviderFilterPushDown::Exact, + } + } +} + +impl From<&TableProviderFilterPushDown> for FFI_TableProviderFilterPushDown { + fn from(value: &TableProviderFilterPushDown) -> Self { + match value { + TableProviderFilterPushDown::Unsupported => { + FFI_TableProviderFilterPushDown::Unsupported + } + TableProviderFilterPushDown::Inexact => { + FFI_TableProviderFilterPushDown::Inexact + } + TableProviderFilterPushDown::Exact => FFI_TableProviderFilterPushDown::Exact, + } + } +} + +/// FFI safe version of [`TableType`]. +#[repr(C)] +#[allow(non_camel_case_types)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, StableAbi)] +pub enum FFI_TableType { + Base, + View, + Temporary, +} + +impl From for TableType { + fn from(value: FFI_TableType) -> Self { + match value { + FFI_TableType::Base => TableType::Base, + FFI_TableType::View => TableType::View, + FFI_TableType::Temporary => TableType::Temporary, + } + } +} + +impl From for FFI_TableType { + fn from(value: TableType) -> Self { + match value { + TableType::Base => FFI_TableType::Base, + TableType::View => FFI_TableType::View, + TableType::Temporary => FFI_TableType::Temporary, + } + } +} diff --git a/datafusion/functions-aggregate-common/src/accumulator.rs b/datafusion/functions-aggregate-common/src/accumulator.rs index ddf0085b9de4..67ada562800b 100644 --- a/datafusion/functions-aggregate-common/src/accumulator.rs +++ b/datafusion/functions-aggregate-common/src/accumulator.rs @@ -18,9 +18,8 @@ use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::Result; use datafusion_expr_common::accumulator::Accumulator; -use datafusion_physical_expr_common::{ - physical_expr::PhysicalExpr, sort_expr::PhysicalSortExpr, -}; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::sync::Arc; /// [`AccumulatorArgs`] contains information about how an aggregate @@ -53,7 +52,7 @@ pub struct AccumulatorArgs<'a> { /// ``` /// /// If no `ORDER BY` is specified, `ordering_req` will be empty. - pub ordering_req: &'a [PhysicalSortExpr], + pub ordering_req: LexOrderingRef<'a>, /// Whether the aggregation is running in reverse order pub is_reversed: bool, diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs index 4fba772d8ddc..f55e5ec9a41d 100644 --- a/datafusion/functions-aggregate-common/src/utils.rs +++ b/datafusion/functions-aggregate-common/src/utils.rs @@ -30,7 +30,7 @@ use arrow::{ }; use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr_common::accumulator::Accumulator; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; /// Convert scalar values from an accumulator into arrays. pub fn get_accum_scalar_values_as_arrays( @@ -88,7 +88,7 @@ pub fn adjust_output_array(data_type: &DataType, array: ArrayRef) -> Result Vec { @@ -107,7 +107,7 @@ pub fn ordering_fields( } /// Selects the sort option attribute from all the given `PhysicalSortExpr`s. -pub fn get_sort_options(ordering_req: &[PhysicalSortExpr]) -> Vec { +pub fn get_sort_options(ordering_req: LexOrderingRef) -> Vec { ordering_req.iter().map(|item| item.options).collect() } diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs index 65956cb8a1de..1c8266ed5b89 100644 --- a/datafusion/functions-aggregate/benches/count.rs +++ b/datafusion/functions-aggregate/benches/count.rs @@ -23,6 +23,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator}; use datafusion_functions_aggregate::count::Count; use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::sync::Arc; fn prepare_accumulator() -> Box { @@ -31,7 +32,7 @@ fn prepare_accumulator() -> Box { return_type: &DataType::Int64, schema: &schema, ignore_nulls: false, - ordering_req: &[], + ordering_req: LexOrderingRef::default(), is_reversed: false, name: "COUNT(f)", is_distinct: false, diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs index 652d447129dc..1e9493280ed2 100644 --- a/datafusion/functions-aggregate/benches/sum.rs +++ b/datafusion/functions-aggregate/benches/sum.rs @@ -23,6 +23,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator}; use datafusion_functions_aggregate::sum::Sum; use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::sync::Arc; fn prepare_accumulator(data_type: &DataType) -> Box { @@ -31,7 +32,7 @@ fn prepare_accumulator(data_type: &DataType) -> Box { return_type: data_type, schema: &schema, ignore_nulls: false, - ordering_req: &[], + ordering_req: LexOrderingRef::default(), is_reversed: false, name: "SUM(f)", is_distinct: false, diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index b3e04c5584ef..7c22c21e38c9 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -135,7 +135,7 @@ impl AggregateUDFImpl for ArrayAgg { OrderSensitiveArrayAggAccumulator::try_new( &data_type, &ordering_dtypes, - acc_args.ordering_req.to_vec(), + LexOrdering::from_ref(acc_args.ordering_req), acc_args.is_reversed, ) .map(|acc| Box::new(acc) as _) @@ -511,7 +511,7 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator { impl OrderSensitiveArrayAggAccumulator { fn evaluate_orderings(&self) -> Result { - let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]); + let fields = ordering_fields(self.ordering_req.as_ref(), &self.datatypes[1..]); let num_columns = fields.len(); let struct_field = Fields::from(fields.clone()); diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs index da3fc62f8c8c..0b05713499a9 100644 --- a/datafusion/functions-aggregate/src/first_last.rs +++ b/datafusion/functions-aggregate/src/first_last.rs @@ -37,7 +37,7 @@ use datafusion_expr::{ ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility, }; use datafusion_functions_aggregate_common::utils::get_sort_options; -use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; create_func!(FirstValue, first_value_udaf); @@ -130,7 +130,7 @@ impl AggregateUDFImpl for FirstValue { FirstValueAccumulator::try_new( acc_args.return_type, &ordering_dtypes, - acc_args.ordering_req.to_vec(), + LexOrdering::from_ref(acc_args.ordering_req), acc_args.ignore_nulls, ) .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _) @@ -315,7 +315,7 @@ impl Accumulator for FirstValueAccumulator { if compare_rows( &self.orderings, orderings, - &get_sort_options(&self.ordering_req), + &get_sort_options(self.ordering_req.as_ref()), )? .is_gt() { @@ -333,8 +333,10 @@ impl Accumulator for FirstValueAccumulator { let flags = states[is_set_idx].as_boolean(); let filtered_states = filter_states_according_to_is_set(states, flags)?; // 1..is_set_idx range corresponds to ordering section - let sort_cols = - convert_to_sort_cols(&filtered_states[1..is_set_idx], &self.ordering_req); + let sort_cols = convert_to_sort_cols( + &filtered_states[1..is_set_idx], + self.ordering_req.as_ref(), + ); let ordered_states = if sort_cols.is_empty() { // When no ordering is given, use the existing state as is: @@ -347,7 +349,7 @@ impl Accumulator for FirstValueAccumulator { let first_row = get_row_at_idx(&ordered_states, 0)?; // When collecting orderings, we exclude the is_set flag from the state. let first_ordering = &first_row[1..is_set_idx]; - let sort_options = get_sort_options(&self.ordering_req); + let sort_options = get_sort_options(self.ordering_req.as_ref()); // Either there is no existing value, or there is an earlier version in new data. if !self.is_set || compare_rows(&self.orderings, first_ordering, &sort_options)?.is_gt() @@ -453,7 +455,7 @@ impl AggregateUDFImpl for LastValue { LastValueAccumulator::try_new( acc_args.return_type, &ordering_dtypes, - acc_args.ordering_req.to_vec(), + LexOrdering::from_ref(acc_args.ordering_req), acc_args.ignore_nulls, ) .map(|acc| Box::new(acc.with_requirement_satisfied(requirement_satisfied)) as _) @@ -645,7 +647,7 @@ impl Accumulator for LastValueAccumulator { if compare_rows( &self.orderings, orderings, - &get_sort_options(&self.ordering_req), + &get_sort_options(self.ordering_req.as_ref()), )? .is_lt() { @@ -663,8 +665,10 @@ impl Accumulator for LastValueAccumulator { let flags = states[is_set_idx].as_boolean(); let filtered_states = filter_states_according_to_is_set(states, flags)?; // 1..is_set_idx range corresponds to ordering section - let sort_cols = - convert_to_sort_cols(&filtered_states[1..is_set_idx], &self.ordering_req); + let sort_cols = convert_to_sort_cols( + &filtered_states[1..is_set_idx], + self.ordering_req.as_ref(), + ); let ordered_states = if sort_cols.is_empty() { // When no ordering is given, use existing state as is: @@ -679,7 +683,7 @@ impl Accumulator for LastValueAccumulator { let last_row = get_row_at_idx(&ordered_states, last_idx)?; // When collecting orderings, we exclude the is_set flag from the state. let last_ordering = &last_row[1..is_set_idx]; - let sort_options = get_sort_options(&self.ordering_req); + let sort_options = get_sort_options(self.ordering_req.as_ref()); // Either there is no existing value, or there is a newer (latest) // version in the new data: if !self.is_set @@ -721,7 +725,7 @@ fn filter_states_according_to_is_set( /// Combines array refs and their corresponding orderings to construct `SortColumn`s. fn convert_to_sort_cols( arrs: &[ArrayRef], - sort_exprs: &[PhysicalSortExpr], + sort_exprs: LexOrderingRef, ) -> Vec { arrs.iter() .zip(sort_exprs.iter()) @@ -740,10 +744,18 @@ mod tests { #[test] fn test_first_last_value_value() -> Result<()> { - let mut first_accumulator = - FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; - let mut last_accumulator = - LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut first_accumulator = FirstValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; + let mut last_accumulator = LastValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; // first value in the tuple is start of the range (inclusive), // second value in the tuple is end of the range (exclusive) let ranges: Vec<(i64, i64)> = vec![(0, 10), (1, 11), (2, 13)]; @@ -780,14 +792,22 @@ mod tests { .collect::>(); // FirstValueAccumulator - let mut first_accumulator = - FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut first_accumulator = FirstValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; first_accumulator.update_batch(&[Arc::clone(&arrs[0])])?; let state1 = first_accumulator.state()?; - let mut first_accumulator = - FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut first_accumulator = FirstValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; first_accumulator.update_batch(&[Arc::clone(&arrs[1])])?; let state2 = first_accumulator.state()?; @@ -802,22 +822,34 @@ mod tests { ])?); } - let mut first_accumulator = - FirstValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut first_accumulator = FirstValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; first_accumulator.merge_batch(&states)?; let merged_state = first_accumulator.state()?; assert_eq!(merged_state.len(), state1.len()); // LastValueAccumulator - let mut last_accumulator = - LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut last_accumulator = LastValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; last_accumulator.update_batch(&[Arc::clone(&arrs[0])])?; let state1 = last_accumulator.state()?; - let mut last_accumulator = - LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut last_accumulator = LastValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; last_accumulator.update_batch(&[Arc::clone(&arrs[1])])?; let state2 = last_accumulator.state()?; @@ -832,8 +864,12 @@ mod tests { ])?); } - let mut last_accumulator = - LastValueAccumulator::try_new(&DataType::Int64, &[], vec![], false)?; + let mut last_accumulator = LastValueAccumulator::try_new( + &DataType::Int64, + &[], + LexOrdering::default(), + false, + )?; last_accumulator.merge_batch(&states)?; let merged_state = last_accumulator.state()?; diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index 2a1778d8b232..5f3a8cf2f161 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -133,7 +133,7 @@ impl AggregateUDFImpl for NthValueAgg { n, &data_type, &ordering_dtypes, - acc_args.ordering_req.to_vec(), + LexOrdering::from_ref(acc_args.ordering_req), ) .map(|acc| Box::new(acc) as _) } @@ -403,7 +403,7 @@ impl Accumulator for NthValueAccumulator { impl NthValueAccumulator { fn evaluate_orderings(&self) -> Result { - let fields = ordering_fields(&self.ordering_req, &self.datatypes[1..]); + let fields = ordering_fields(self.ordering_req.as_ref(), &self.datatypes[1..]); let struct_field = Fields::from(fields.clone()); let mut column_wise_ordering_values = vec![]; diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs index cfbde3bc5c61..dbd3dafc4053 100644 --- a/datafusion/functions-aggregate/src/stddev.rs +++ b/datafusion/functions-aggregate/src/stddev.rs @@ -411,6 +411,7 @@ mod tests { use datafusion_expr::AggregateUDF; use datafusion_functions_aggregate_common::utils::get_accum_scalar_values_as_arrays; use datafusion_physical_expr::expressions::col; + use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::sync::Arc; #[test] @@ -462,7 +463,7 @@ mod tests { return_type: &DataType::Float64, schema, ignore_nulls: false, - ordering_req: &[], + ordering_req: LexOrderingRef::default(), name: "a", is_distinct: false, is_reversed: false, @@ -473,7 +474,7 @@ mod tests { return_type: &DataType::Float64, schema, ignore_nulls: false, - ordering_req: &[], + ordering_req: LexOrderingRef::default(), name: "a", is_distinct: false, is_reversed: false, diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index ca23d8b7ff4c..3c4a09c65992 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -96,6 +96,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( + #[allow(deprecated)] // TODO use invoke_batch map_udf() .invoke(&[keys.clone(), values.clone()]) .expect("map should work on valid values"), diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index 294076a52b52..b0255e7be2a3 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -19,8 +19,10 @@ use crate::utils::make_scalar_function; use arrow::array::{Capacities, MutableArrayData}; -use arrow_array::{ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait}; -use arrow_buffer::{ArrowNativeType, OffsetBuffer}; +use arrow_array::{ + new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait, +}; +use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer}; use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; @@ -134,6 +136,23 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result { return exec_err!("array_resize needs two or three arguments"); } + let array = &arg[0]; + + // Checks if entire array is null + if array.null_count() == array.len() { + let return_type = match array.data_type() { + List(field) => List(Arc::clone(field)), + LargeList(field) => LargeList(Arc::clone(field)), + _ => { + return exec_err!( + "array_resize does not support type '{:?}'.", + array.data_type() + ) + } + }; + return Ok(new_null_array(&return_type, array.len())); + } + let new_len = as_int64_array(&arg[1])?; let new_element = if arg.len() == 3 { Some(Arc::clone(&arg[2])) @@ -184,7 +203,16 @@ fn general_list_resize>( capacity, ); + let mut null_builder = BooleanBufferBuilder::new(array.len()); + for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + if array.is_null(row_index) { + null_builder.append(false); + offsets.push(offsets[row_index]); + continue; + } + null_builder.append(true); + let count = count_array.value(row_index).to_usize().ok_or_else(|| { internal_datafusion_err!("array_resize: failed to convert size to usize") })?; @@ -211,10 +239,12 @@ fn general_list_resize>( } let data = mutable.freeze(); + let null_bit_buffer: NullBuffer = null_builder.finish().into(); + Ok(Arc::new(GenericListArray::::try_new( Arc::clone(field), OffsetBuffer::::new(offsets.into()), arrow_array::make_array(data), - None, + Some(null_bit_buffer), )?)) } diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index 17c4dd1f8912..9ba16807de01 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -84,28 +84,48 @@ fn criterion_benchmark(c: &mut Criterion) { let args_string_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, false); c.bench_function( &format!("character_length_StringArray_ascii_str_len_{}", str_len), - |b| b.iter(|| black_box(character_length.invoke(&args_string_ascii))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(character_length.invoke(&args_string_ascii)) + }) + }, ); // StringArray UTF8 let args_string_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, false); c.bench_function( &format!("character_length_StringArray_utf8_str_len_{}", str_len), - |b| b.iter(|| black_box(character_length.invoke(&args_string_utf8))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(character_length.invoke(&args_string_utf8)) + }) + }, ); // StringViewArray ASCII only let args_string_view_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, true); c.bench_function( &format!("character_length_StringViewArray_ascii_str_len_{}", str_len), - |b| b.iter(|| black_box(character_length.invoke(&args_string_view_ascii))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(character_length.invoke(&args_string_view_ascii)) + }) + }, ); // StringViewArray UTF8 let args_string_view_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, true); c.bench_function( &format!("character_length_StringViewArray_utf8_str_len_{}", str_len), - |b| b.iter(|| black_box(character_length.invoke(&args_string_view_utf8))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(character_length.invoke(&args_string_view_utf8)) + }) + }, ); } } diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index 91c46ac775a8..280819778f93 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -38,7 +38,10 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args(size, 32); let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { - b.iter(|| criterion::black_box(concat().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(concat().invoke(&args).unwrap()) + }) }); group.finish(); } diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index e655d82dec91..a33f00b4b73e 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -33,12 +33,18 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("cot f32 array: {}", size), |b| { - b.iter(|| black_box(cot_fn.invoke(&f32_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(cot_fn.invoke(&f32_args).unwrap()) + }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("cot f64 array: {}", size), |b| { - b.iter(|| black_box(cot_fn.invoke(&f64_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(cot_fn.invoke(&f64_args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index c881947354fd..4a8682c42f94 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -45,6 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) { let udf = date_bin(); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( udf.invoke(&[interval.clone(), timestamps.clone()]) .expect("date_bin should work on valid values"), diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index d49235aac938..0615091e90d4 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -29,22 +29,30 @@ fn criterion_benchmark(c: &mut Criterion) { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { let method = ColumnarValue::Scalar("base64".into()); + #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) .unwrap(); let args = vec![encoded, method]; - b.iter(|| black_box(decode.invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(decode.invoke(&args).unwrap()) + }) }); c.bench_function(&format!("hex_decode/{size}"), |b| { let method = ColumnarValue::Scalar("hex".into()); + #[allow(deprecated)] // TODO use invoke_batch let encoded = encoding::encode() .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) .unwrap(); let args = vec![encoded, method]; - b.iter(|| black_box(decode.invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(decode.invoke(&args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 16bbe073daf0..3e50de658b36 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("isnan f32 array: {}", size), |b| { - b.iter(|| black_box(isnan.invoke(&f32_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(isnan.invoke(&f32_args).unwrap()) + }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("isnan f64 array: {}", size), |b| { - b.iter(|| black_box(isnan.invoke(&f64_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(isnan.invoke(&f64_args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 3348d172e1f2..3e6ac97063ca 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { - b.iter(|| black_box(iszero.invoke(&f32_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(iszero.invoke(&f32_args).unwrap()) + }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { - b.iter(|| black_box(iszero.invoke(&f64_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(iszero.invoke(&f64_args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index 934c1c6bd189..6cc67791464f 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -124,19 +124,32 @@ fn criterion_benchmark(c: &mut Criterion) { for size in [1024, 4096, 8192] { let args = create_args1(size, 32); c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| { - b.iter(|| black_box(lower.invoke(&args))) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }) }); let args = create_args2(size); c.bench_function( &format!("lower_the_first_value_is_nonascii: {}", size), - |b| b.iter(|| black_box(lower.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }) + }, ); let args = create_args3(size); c.bench_function( &format!("lower_the_middle_value_is_nonascii: {}", size), - |b| b.iter(|| black_box(lower.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }) + }, ); } @@ -151,24 +164,33 @@ fn criterion_benchmark(c: &mut Criterion) { for &size in &sizes { let args = create_args4(size, str_len, *null_density, mixed); c.bench_function( - &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", + &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), - |b| b.iter(|| black_box(lower.invoke(&args))), - ); + |b| b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }), + ); let args = create_args4(size, str_len, *null_density, mixed); c.bench_function( - &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", + &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), - |b| b.iter(|| black_box(lower.invoke(&args))), - ); + |b| b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }), + ); let args = create_args5(size, 0.1, *null_density); c.bench_function( - &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}", + &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}", size, str_len, 0.1, null_density, mixed), - |b| b.iter(|| black_box(lower.invoke(&args))), - ); + |b| b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(lower.invoke(&args)) + }), + ); } } } diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index b3fa5ef4fdff..4f94729b6fef 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -139,7 +139,12 @@ fn run_with_string_type( format!( "{string_type} [size={size}, len_before={len}, len_after={remaining_len}]", ), - |b| b.iter(|| black_box(ltrim.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(ltrim.invoke(&args)) + }) + }, ); } diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index cb8f1abe6d5d..a9844e4b2541 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -62,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() .invoke(&[years.clone(), months.clone(), days.clone()]) @@ -77,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() .invoke(&[year.clone(), months.clone(), days.clone()]) @@ -92,6 +94,7 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() .invoke(&[year.clone(), month.clone(), days.clone()]) @@ -106,6 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) { let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( make_date() .invoke(&[year.clone(), month.clone(), day.clone()]) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index dfabad335835..6e1154cf182a 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -33,7 +33,10 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Array(array), ]; c.bench_function(&format!("nullif scalar array: {}", size), |b| { - b.iter(|| black_box(nullif.invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(nullif.invoke(&args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 71fa68762c1e..4b21ca373047 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -101,17 +101,26 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { - b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(lpad().invoke(&args).unwrap()) + }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { - b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(lpad().invoke(&args).unwrap()) + }) }); let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { - b.iter(|| criterion::black_box(lpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(lpad().invoke(&args).unwrap()) + }) }); group.finish(); @@ -120,18 +129,27 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { - b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(rpad().invoke(&args).unwrap()) + }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { - b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(rpad().invoke(&args).unwrap()) + }) }); // rpad for stringview type let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { - b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + criterion::black_box(rpad().invoke(&args).unwrap()) + }) }); group.finish(); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 5643ccf07133..6e54c92b9b26 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -71,7 +71,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_string_view [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); let args = create_args::(size, 32, repeat_times, false); @@ -80,7 +85,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_string [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); let args = create_args::(size, 32, repeat_times, false); @@ -89,7 +99,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_large_string [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); group.finish(); @@ -107,7 +122,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_string_view [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); let args = create_args::(size, 32, repeat_times, false); @@ -116,7 +136,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_string [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); let args = create_args::(size, 32, repeat_times, false); @@ -125,7 +150,12 @@ fn criterion_benchmark(c: &mut Criterion) { "repeat_large_string [size={}, repeat_times={}]", size, repeat_times ), - |b| b.iter(|| black_box(repeat.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(repeat.invoke(&args)) + }) + }, ); group.finish(); diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 9f8d8258c823..ea1f5433df4e 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -32,12 +32,18 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { - b.iter(|| black_box(signum.invoke(&f32_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(signum.invoke(&f32_args).unwrap()) + }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { - b.iter(|| black_box(signum.invoke(&f64_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(signum.invoke(&f64_args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index c78e69826836..31ca61e34c3a 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -112,28 +112,48 @@ fn criterion_benchmark(c: &mut Criterion) { let args_string_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, false); c.bench_function( &format!("strpos_StringArray_ascii_str_len_{}", str_len), - |b| b.iter(|| black_box(strpos.invoke(&args_string_ascii))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(strpos.invoke(&args_string_ascii)) + }) + }, ); // StringArray UTF8 let args_string_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, false); c.bench_function( &format!("strpos_StringArray_utf8_str_len_{}", str_len), - |b| b.iter(|| black_box(strpos.invoke(&args_string_utf8))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(strpos.invoke(&args_string_utf8)) + }) + }, ); // StringViewArray ASCII only let args_string_view_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, true); c.bench_function( &format!("strpos_StringViewArray_ascii_str_len_{}", str_len), - |b| b.iter(|| black_box(strpos.invoke(&args_string_view_ascii))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(strpos.invoke(&args_string_view_ascii)) + }) + }, ); // StringViewArray UTF8 let args_string_view_utf8 = gen_string_array(n_rows, str_len, 0.1, 0.5, true); c.bench_function( &format!("strpos_StringViewArray_utf8_str_len_{}", str_len), - |b| b.iter(|| black_box(strpos.invoke(&args_string_view_utf8))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(strpos.invoke(&args_string_view_utf8)) + }) + }, ); } } diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 90ba75c1e8a5..21020dad31a4 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -107,19 +107,34 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args_without_count::(size, len, true, true); group.bench_function( format!("substr_string_view [size={}, strlen={}]", size, len), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_without_count::(size, len, false, false); group.bench_function( format!("substr_string [size={}, strlen={}]", size, len), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_without_count::(size, len, true, false); group.bench_function( format!("substr_large_string [size={}, strlen={}]", size, len), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); group.finish(); @@ -137,7 +152,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_string_view [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_with_count::(size, len, count, false); @@ -146,7 +166,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_string [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_with_count::(size, len, count, false); @@ -155,7 +180,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_large_string [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); group.finish(); @@ -173,7 +203,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_string_view [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_with_count::(size, len, count, false); @@ -182,7 +217,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_string [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); let args = create_args_with_count::(size, len, count, false); @@ -191,7 +231,12 @@ fn criterion_benchmark(c: &mut Criterion) { "substr_large_string [size={}, count={}, strlen={}]", size, count, len, ), - |b| b.iter(|| black_box(substr.invoke(&args))), + |b| { + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(substr.invoke(&args)) + }) + }, ); group.finish(); diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index bb9a5b809eee..1e793cf4db8c 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -90,6 +90,7 @@ fn criterion_benchmark(c: &mut Criterion) { let args = [strings, delimiters, counts]; b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( substr_index() .invoke(&args) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index d9a153e64abc..09032fdf2de1 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -86,6 +86,7 @@ fn criterion_benchmark(c: &mut Criterion) { let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() .invoke(&[data.clone(), patterns.clone()]) @@ -101,6 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() .invoke(&[data.clone(), patterns.clone()]) @@ -124,6 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) { ))); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_char() .invoke(&[data.clone(), pattern.clone()]) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index 5a87b34caf47..11816fe9c64f 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -113,6 +113,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data()) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&[string_array.clone()]) @@ -126,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&[string_array.clone()]) @@ -139,6 +141,7 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&[string_array.clone()]) @@ -157,6 +160,7 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Array(Arc::new(format3) as ArrayRef), ]; b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&args.clone()) @@ -183,6 +187,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&args.clone()) @@ -209,6 +214,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch black_box( to_timestamp() .invoke(&args.clone()) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 92a08abf3d32..07ce522eb913 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -33,12 +33,18 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("trunc f32 array: {}", size), |b| { - b.iter(|| black_box(trunc.invoke(&f32_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(trunc.invoke(&f32_args).unwrap()) + }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("trunc f64 array: {}", size), |b| { - b.iter(|| black_box(trunc.invoke(&f64_args).unwrap())) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(trunc.invoke(&f64_args).unwrap()) + }) }); } } diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index a3e5fbd7a433..ac4ecacff941 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -37,7 +37,10 @@ fn criterion_benchmark(c: &mut Criterion) { for size in [1024, 4096, 8192] { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { - b.iter(|| black_box(upper.invoke(&args))) + b.iter(|| { + #[allow(deprecated)] // TODO use invoke_batch + black_box(upper.invoke(&args)) + }) }); } } diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index f726122c649a..e7ac749ddddc 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -118,7 +118,7 @@ mod test { #[tokio::test] async fn test_version_udf() { let version_udf = ScalarUDF::from(VersionFunc::new()); - let version = version_udf.invoke_no_args(0).unwrap(); + let version = version_udf.invoke_batch(&[], 1).unwrap(); if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(version))) = version { assert!(version.starts_with("Apache DataFusion")); diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index e335c4e097f7..e8d065df8633 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 { fn compute_distance(time_diff: i64, stride: i64) -> i64 { let time_delta = time_diff - (time_diff % stride); - if time_diff < 0 && stride > 1 { + if time_diff < 0 && stride > 1 && time_delta != time_diff { // The origin is later than the source timestamp, round down to the previous bin time_delta - stride } else { @@ -864,4 +864,32 @@ mod tests { assert_eq!(result, expected1, "{source} = {expected}"); }) } + + #[test] + fn test_date_bin_before_epoch() { + let cases = [ + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"), + "1969-12-31T23:30:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"), + "1969-12-31T23:45:00", + ), + ( + (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"), + "1969-12-31T23:45:00", + ), + ]; + + cases.iter().for_each(|((stride, source), expected)| { + let stride = stride.unwrap(); + let stride1 = stride.num_nanoseconds().unwrap(); + let source1 = string_to_timestamp_nanos(source).unwrap(); + + let expected1 = string_to_timestamp_nanos(expected).unwrap(); + let result = date_bin_nanos_interval(stride1, source1, 0); + assert_eq!(result, expected1, "{source} = {expected}"); + }) + } } diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs index 0ffc954388f5..fa04835f0967 100644 --- a/datafusion/optimizer/src/analyzer/subquery.rs +++ b/datafusion/optimizer/src/analyzer/subquery.rs @@ -181,7 +181,10 @@ fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Re })?; Ok(()) } - JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::Left + | JoinType::LeftSemi + | JoinType::LeftAnti + | JoinType::LeftMark => { check_inner_plan(left, can_contain_outer_ref)?; check_inner_plan(right, false) } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 5d33b58a0241..9793c4c5490f 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -688,6 +688,21 @@ fn coerce_frame_bound( } } +fn extract_window_frame_target_type(col_type: &DataType) -> Result { + if col_type.is_numeric() + || is_utf8_or_large_utf8(col_type) + || matches!(col_type, DataType::Null) + { + Ok(col_type.clone()) + } else if is_datetime(col_type) { + Ok(DataType::Interval(IntervalUnit::MonthDayNano)) + } else if let DataType::Dictionary(_, value_type) = col_type { + extract_window_frame_target_type(value_type) + } else { + return internal_err!("Cannot run range queries on datatype: {col_type:?}"); + } +} + // Coerces the given `window_frame` to use appropriate natural types. // For example, ROWS and GROUPS frames use `UInt64` during calculations. fn coerce_window_frame( @@ -703,18 +718,7 @@ fn coerce_window_frame( .map(|s| s.expr.get_type(schema)) .transpose()?; if let Some(col_type) = current_types { - if col_type.is_numeric() - || is_utf8_or_large_utf8(&col_type) - || matches!(col_type, DataType::Null) - { - col_type - } else if is_datetime(&col_type) { - DataType::Interval(IntervalUnit::MonthDayNano) - } else { - return internal_err!( - "Cannot run range queries on datatype: {col_type:?}" - ); - } + extract_window_frame_target_type(&col_type)? } else { return internal_err!("ORDER BY column cannot be empty"); } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index ee9ae9fb15a7..4fe22d252744 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -553,7 +553,8 @@ impl OptimizerRule for CommonSubexprEliminate { | LogicalPlan::Copy(_) | LogicalPlan::Unnest(_) | LogicalPlan::RecursiveQuery(_) - | LogicalPlan::Prepare(_) => { + | LogicalPlan::Prepare(_) + | LogicalPlan::Execute(_) => { // This rule handles recursion itself in a `ApplyOrder::TopDown` like // manner. plan.map_children(|c| self.rewrite(c, config))? diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index cc1687cffe92..7fdad5ba4b6e 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -17,7 +17,6 @@ //! [`DecorrelatePredicateSubquery`] converts `IN`/`EXISTS` subquery predicates to `SEMI`/`ANTI` joins use std::collections::BTreeSet; -use std::iter; use std::ops::Deref; use std::sync::Arc; @@ -34,11 +33,10 @@ use datafusion_expr::expr_rewriter::create_col_from_scalar_expr; use datafusion_expr::logical_plan::{JoinType, Subquery}; use datafusion_expr::utils::{conjunction, split_conjunction_owned}; use datafusion_expr::{ - exists, in_subquery, lit, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter, + exists, in_subquery, not, not_exists, not_in_subquery, BinaryExpr, Expr, Filter, LogicalPlan, LogicalPlanBuilder, Operator, }; -use itertools::chain; use log::debug; /// Optimizer rule for rewriting predicate(IN/EXISTS) subquery to left semi/anti joins @@ -138,17 +136,14 @@ fn rewrite_inner_subqueries( Expr::Exists(Exists { subquery: Subquery { subquery, .. }, negated, - }) => { - match existence_join(&cur_input, Arc::clone(&subquery), None, negated, alias)? - { - Some((plan, exists_expr)) => { - cur_input = plan; - Ok(Transformed::yes(exists_expr)) - } - None if negated => Ok(Transformed::no(not_exists(subquery))), - None => Ok(Transformed::no(exists(subquery))), + }) => match mark_join(&cur_input, Arc::clone(&subquery), None, negated, alias)? { + Some((plan, exists_expr)) => { + cur_input = plan; + Ok(Transformed::yes(exists_expr)) } - } + None if negated => Ok(Transformed::no(not_exists(subquery))), + None => Ok(Transformed::no(exists(subquery))), + }, Expr::InSubquery(InSubquery { expr, subquery: Subquery { subquery, .. }, @@ -159,7 +154,7 @@ fn rewrite_inner_subqueries( .map_or(plan_err!("single expression required."), |output_expr| { Ok(Expr::eq(*expr.clone(), output_expr)) })?; - match existence_join( + match mark_join( &cur_input, Arc::clone(&subquery), Some(in_predicate), @@ -283,10 +278,6 @@ fn build_join_top( build_join(left, subquery, in_predicate_opt, join_type, subquery_alias) } -/// Existence join is emulated by adding a non-nullable column to the subquery and using a left join -/// and checking if the column is null or not. If native support is added for Existence/Mark then -/// we should use that instead. -/// /// This is used to handle the case when the subquery is embedded in a more complex boolean /// expression like and OR. For example /// @@ -296,37 +287,26 @@ fn build_join_top( /// /// ```text /// Projection: t1.id -/// Filter: t1.id < 0 OR __correlated_sq_1.__exists IS NOT NULL -/// Left Join: Filter: t1.id = __correlated_sq_1.id +/// Filter: t1.id < 0 OR __correlated_sq_1.mark +/// LeftMark Join: Filter: t1.id = __correlated_sq_1.id /// TableScan: t1 /// SubqueryAlias: __correlated_sq_1 -/// Projection: t2.id, true as __exists +/// Projection: t2.id /// TableScan: t2 -fn existence_join( +fn mark_join( left: &LogicalPlan, subquery: Arc, in_predicate_opt: Option, negated: bool, alias_generator: &Arc, ) -> Result> { - // Add non nullable column to emulate existence join - let always_true_expr = lit(true).alias("__exists"); - let cols = chain( - subquery.schema().columns().into_iter().map(Expr::Column), - iter::once(always_true_expr), - ); - let subquery = LogicalPlanBuilder::from(subquery).project(cols)?.build()?; let alias = alias_generator.next("__correlated_sq"); - let exists_col = Expr::Column(Column::new(Some(alias.clone()), "__exists")); - let exists_expr = if negated { - exists_col.is_null() - } else { - exists_col.is_not_null() - }; + let exists_col = Expr::Column(Column::new(Some(alias.clone()), "mark")); + let exists_expr = if negated { !exists_col } else { exists_col }; Ok( - build_join(left, &subquery, in_predicate_opt, JoinType::Left, alias)? + build_join(left, &subquery, in_predicate_opt, JoinType::LeftMark, alias)? .map(|plan| (plan, exists_expr)), ) } diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 42eff7100fbe..ec2225bbc042 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -348,7 +348,8 @@ fn optimize_projections( | LogicalPlan::RecursiveQuery(_) | LogicalPlan::Statement(_) | LogicalPlan::Values(_) - | LogicalPlan::DescribeTable(_) => { + | LogicalPlan::DescribeTable(_) + | LogicalPlan::Execute(_) => { // These operators have no inputs, so stop the optimization process. return Ok(Transformed::no(plan)); } @@ -677,7 +678,11 @@ fn split_join_requirements( ) -> (RequiredIndicies, RequiredIndicies) { match join_type { // In these cases requirements are split between left/right children: - JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => { + JoinType::Inner + | JoinType::Left + | JoinType::Right + | JoinType::Full + | JoinType::LeftMark => { // Decrease right side indices by `left_len` so that they point to valid // positions within the right child: indices.split_off(left_len) diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index a0262d7d95df..acb7ba0fa757 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -161,7 +161,7 @@ pub(crate) fn lr_is_preserved(join_type: JoinType) -> (bool, bool) { JoinType::Full => (false, false), // No columns from the right side of the join can be referenced in output // predicates for semi/anti joins, so whether we specify t/f doesn't matter. - JoinType::LeftSemi | JoinType::LeftAnti => (true, false), + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => (true, false), // No columns from the left side of the join can be referenced in output // predicates for semi/anti joins, so whether we specify t/f doesn't matter. JoinType::RightSemi | JoinType::RightAnti => (false, true), @@ -186,6 +186,7 @@ pub(crate) fn on_lr_is_preserved(join_type: JoinType) -> (bool, bool) { JoinType::LeftSemi | JoinType::RightSemi => (true, true), JoinType::LeftAnti => (false, true), JoinType::RightAnti => (true, false), + JoinType::LeftMark => (false, true), } } @@ -677,11 +678,13 @@ fn infer_join_predicates_from_on_filters( on_filters, inferred_predicates, ), - JoinType::Left | JoinType::LeftSemi => infer_join_predicates_impl::( - join_col_keys, - on_filters, - inferred_predicates, - ), + JoinType::Left | JoinType::LeftSemi | JoinType::LeftMark => { + infer_join_predicates_impl::( + join_col_keys, + on_filters, + inferred_predicates, + ) + } JoinType::Right | JoinType::RightSemi => { infer_join_predicates_impl::( join_col_keys, @@ -1142,7 +1145,7 @@ fn rewrite_projection( (qualified_name(qualifier, field.name()), expr) }) - .partition(|(_, value)| value.is_volatile().unwrap_or(true)); + .partition(|(_, value)| value.is_volatile()); let mut push_predicates = vec![]; let mut keep_predicates = vec![]; diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs index ec7a0a1364b6..8a3aa4bb8459 100644 --- a/datafusion/optimizer/src/push_down_limit.rs +++ b/datafusion/optimizer/src/push_down_limit.rs @@ -248,7 +248,7 @@ fn push_down_join(mut join: Join, limit: usize) -> Transformed { let (left_limit, right_limit) = if is_no_join_condition(&join) { match join.join_type { Left | Right | Full | Inner => (Some(limit), Some(limit)), - LeftAnti | LeftSemi => (Some(limit), None), + LeftAnti | LeftSemi | LeftMark => (Some(limit), None), RightAnti | RightSemi => (None, Some(limit)), } } else { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index ce6734616b80..40be1f85391d 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -862,8 +862,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { right, }) if has_common_conjunction(&left, &right) => { let lhs: IndexSet = iter_conjunction_owned(*left).collect(); - let (common, rhs): (Vec<_>, Vec<_>) = - iter_conjunction_owned(*right).partition(|e| lhs.contains(e)); + let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right) + .partition(|e| lhs.contains(e) && !e.is_volatile()); let new_rhs = rhs.into_iter().reduce(and); let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and); @@ -1682,8 +1682,8 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { } fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool { - let lhs: HashSet<&Expr> = iter_conjunction(lhs).collect(); - iter_conjunction(rhs).any(|e| lhs.contains(&e)) + let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect(); + iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile()) } // TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121 @@ -3978,4 +3978,69 @@ mod tests { unimplemented!("not needed for tests") } } + #[derive(Debug)] + struct VolatileUdf { + signature: Signature, + } + + impl VolatileUdf { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![], Volatility::Volatile), + } + } + } + impl ScalarUDFImpl for VolatileUdf { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn name(&self) -> &str { + "VolatileUdf" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Int16) + } + } + #[test] + fn test_optimize_volatile_conditions() { + let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new())); + let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![])); + { + let expr = rand + .clone() + .eq(lit(0)) + .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))); + + assert_eq!(simplify(expr.clone()), expr); + } + + { + let expr = col("column1") + .eq(lit(2)) + .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))); + + assert_eq!(simplify(expr), col("column1").eq(lit(2))); + } + + { + let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col( + "column1", + ) + .eq(lit(2)) + .and(rand.clone().eq(lit(0)))); + + assert_eq!( + simplify(expr), + col("column1") + .eq(lit(2)) + .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0)))) + ); + } + } } diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 38bfc1a93403..c30c3631c193 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -67,16 +67,21 @@ pub static POWS_OF_TEN: [i128; 38] = [ /// returns true if `needle` is found in a chain of search_op /// expressions. Such as: (A AND B) AND C -pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { +fn expr_contains_inner(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { match expr { Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == search_op => { - expr_contains(left, needle, search_op) - || expr_contains(right, needle, search_op) + expr_contains_inner(left, needle, search_op) + || expr_contains_inner(right, needle, search_op) } _ => expr == needle, } } +/// check volatile calls and return if expr contains needle +pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { + expr_contains_inner(expr, needle, search_op) && !needle.is_volatile() +} + /// Deletes all 'needles' or remains one 'needle' that are found in a chain of xor /// expressions. Such as: A ^ (A ^ (B ^ A)) pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr, is_left: bool) -> Expr { @@ -206,7 +211,7 @@ pub fn is_false(expr: &Expr) -> bool { /// returns true if `haystack` looks like (needle OP X) or (X OP needle) pub fn is_op_with(target_op: Operator, haystack: &Expr, needle: &Expr) -> bool { - matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref())) + matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref()) && !needle.is_volatile()) } /// returns true if `not_expr` is !`expr` (not) diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index d825bfe7e264..addf2fbfca0c 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -17,12 +17,13 @@ //! Sort expressions +use crate::physical_expr::PhysicalExpr; +use std::fmt; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; -use std::ops::Deref; +use std::ops::{Deref, Index, Range, RangeFrom, RangeTo}; use std::sync::Arc; - -use crate::physical_expr::PhysicalExpr; +use std::vec::IntoIter; use arrow::compute::kernels::sort::{SortColumn, SortOptions}; use arrow::datatypes::Schema; @@ -143,7 +144,7 @@ impl Hash for PhysicalSortExpr { } impl Display for PhysicalSortExpr { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{} {}", self.expr, to_str(&self.options)) } } @@ -183,26 +184,6 @@ impl PhysicalSortExpr { .map_or(true, |opts| self.options.descending == opts.descending) } } - - /// Returns a [`Display`]able list of `PhysicalSortExpr`. - pub fn format_list(input: &[PhysicalSortExpr]) -> impl Display + '_ { - struct DisplayableList<'a>(&'a [PhysicalSortExpr]); - impl<'a> Display for DisplayableList<'a> { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let mut first = true; - for sort_expr in self.0 { - if first { - first = false; - } else { - write!(f, ",")?; - } - write!(f, "{}", sort_expr)?; - } - Ok(()) - } - } - DisplayableList(input) - } } /// Represents sort requirement associated with a plan @@ -260,7 +241,7 @@ impl PartialEq for PhysicalSortRequirement { } impl Display for PhysicalSortRequirement { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { let opts_string = self.options.as_ref().map_or("NA", to_str); write!(f, "{} {}", self.expr, opts_string) } @@ -274,7 +255,7 @@ pub fn format_physical_sort_requirement_list( ) -> impl Display + '_ { struct DisplayWrapper<'a>(&'a [PhysicalSortRequirement]); impl<'a> Display for DisplayWrapper<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut iter = self.0.iter(); write!(f, "[")?; if let Some(expr) = iter.next() { @@ -345,7 +326,7 @@ impl PhysicalSortRequirement { /// default ordering `ASC, NULLS LAST` if given (see the `PhysicalSortExpr::from`). pub fn to_sort_exprs( requirements: impl IntoIterator, - ) -> Vec { + ) -> LexOrdering { requirements .into_iter() .map(PhysicalSortExpr::from) @@ -364,9 +345,147 @@ fn to_str(options: &SortOptions) -> &str { } } -///`LexOrdering` is an alias for the type `Vec`, which represents +///`LexOrdering` contains a `Vec`, which represents /// a lexicographical ordering. -pub type LexOrdering = Vec; +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct LexOrdering { + pub inner: Vec, +} + +impl LexOrdering { + // Creates a new [`LexOrdering`] from a vector + pub fn new(inner: Vec) -> Self { + Self { inner } + } + + pub fn as_ref(&self) -> LexOrderingRef { + &self.inner + } + + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + pub fn clear(&mut self) { + self.inner.clear() + } + + pub fn contains(&self, expr: &PhysicalSortExpr) -> bool { + self.inner.contains(expr) + } + + pub fn extend>(&mut self, iter: I) { + self.inner.extend(iter) + } + + pub fn from_ref(lex_ordering_ref: LexOrderingRef) -> Self { + Self::new(lex_ordering_ref.to_vec()) + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.inner.iter() + } + + pub fn len(&self) -> usize { + self.inner.len() + } + + pub fn pop(&mut self) -> Option { + self.inner.pop() + } + + pub fn push(&mut self, physical_sort_expr: PhysicalSortExpr) { + self.inner.push(physical_sort_expr) + } + + pub fn retain(&mut self, f: impl FnMut(&PhysicalSortExpr) -> bool) { + self.inner.retain(f) + } + + pub fn truncate(&mut self, len: usize) { + self.inner.truncate(len) + } +} + +impl Deref for LexOrdering { + type Target = [PhysicalSortExpr]; + + fn deref(&self) -> &Self::Target { + self.inner.as_slice() + } +} + +impl Display for LexOrdering { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut first = true; + for sort_expr in &self.inner { + if first { + first = false; + } else { + write!(f, ", ")?; + } + write!(f, "{}", sort_expr)?; + } + Ok(()) + } +} + +impl FromIterator for LexOrdering { + fn from_iter>(iter: T) -> Self { + let mut lex_ordering = LexOrdering::default(); + + for i in iter { + lex_ordering.push(i); + } + + lex_ordering + } +} + +impl Index for LexOrdering { + type Output = PhysicalSortExpr; + + fn index(&self, index: usize) -> &Self::Output { + &self.inner[index] + } +} + +impl Index> for LexOrdering { + type Output = [PhysicalSortExpr]; + + fn index(&self, range: Range) -> &Self::Output { + &self.inner[range] + } +} + +impl Index> for LexOrdering { + type Output = [PhysicalSortExpr]; + + fn index(&self, range_from: RangeFrom) -> &Self::Output { + &self.inner[range_from] + } +} + +impl Index> for LexOrdering { + type Output = [PhysicalSortExpr]; + + fn index(&self, range_to: RangeTo) -> &Self::Output { + &self.inner[range_to] + } +} + +impl IntoIterator for LexOrdering { + type Item = PhysicalSortExpr; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.inner.into_iter() + } +} ///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents /// a reference to a lexicographical ordering. @@ -384,6 +503,10 @@ impl LexRequirement { Self { inner } } + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + pub fn iter(&self) -> impl Iterator { self.inner.iter() } @@ -415,7 +538,7 @@ impl FromIterator for LexRequirement { impl IntoIterator for LexRequirement { type Item = PhysicalSortRequirement; - type IntoIter = std::vec::IntoIter; + type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { self.inner.into_iter() diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs index d2c9bf1a2408..26293b1a76a2 100644 --- a/datafusion/physical-expr-common/src/utils.rs +++ b/datafusion/physical-expr-common/src/utils.rs @@ -24,7 +24,7 @@ use datafusion_common::Result; use datafusion_expr_common::sort_properties::ExprProperties; use crate::physical_expr::PhysicalExpr; -use crate::sort_expr::PhysicalSortExpr; +use crate::sort_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr}; use crate::tree_node::ExprContext; /// Represents a [`PhysicalExpr`] node with associated properties (order and @@ -96,7 +96,7 @@ pub fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result { /// Reverses the ORDER BY expression, which is useful during equivalent window /// expression construction. For instance, 'ORDER BY a ASC, NULLS LAST' turns into /// 'ORDER BY a DESC, NULLS FIRST'. -pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec { +pub fn reverse_order_bys(order_bys: LexOrderingRef) -> LexOrdering { order_bys .iter() .map(|e| PhysicalSortExpr::new(e.expr.clone(), !e.options)) diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index 6330c240241a..e446776affc0 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -45,7 +45,7 @@ use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs; use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs; use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; use datafusion_physical_expr_common::utils::reverse_order_bys; use datafusion_expr_common::groups_accumulator::GroupsAccumulator; @@ -81,7 +81,7 @@ impl AggregateExprBuilder { args, alias: None, schema: Arc::new(Schema::empty()), - ordering_req: vec![], + ordering_req: LexOrdering::default(), ignore_nulls: false, is_distinct: false, is_reversed: false, @@ -111,7 +111,8 @@ impl AggregateExprBuilder { .map(|e| e.expr.data_type(&schema)) .collect::>>()?; - ordering_fields = utils::ordering_fields(&ordering_req, &ordering_types); + ordering_fields = + utils::ordering_fields(ordering_req.as_ref(), &ordering_types); } let input_exprs_types = args @@ -265,7 +266,7 @@ impl AggregateFunctionExpr { return_type: &self.data_type, schema: &self.schema, ignore_nulls: self.ignore_nulls, - ordering_req: &self.ordering_req, + ordering_req: self.ordering_req.as_ref(), is_distinct: self.is_distinct, name: &self.name, is_reversed: self.is_reversed, @@ -291,13 +292,13 @@ impl AggregateFunctionExpr { /// Order by requirements for the aggregate function /// By default it is `None` (there is no requirement) /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this - pub fn order_bys(&self) -> Option<&[PhysicalSortExpr]> { + pub fn order_bys(&self) -> Option { if self.ordering_req.is_empty() { return None; } if !self.order_sensitivity().is_insensitive() { - return Some(&self.ordering_req); + return Some(self.ordering_req.as_ref()); } None @@ -340,7 +341,7 @@ impl AggregateFunctionExpr { }; AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec()) - .order_by(self.ordering_req.to_vec()) + .order_by(self.ordering_req.clone()) .schema(Arc::new(self.schema.clone())) .alias(self.name().to_string()) .with_ignore_nulls(self.ignore_nulls) @@ -356,7 +357,7 @@ impl AggregateFunctionExpr { return_type: &self.data_type, schema: &self.schema, ignore_nulls: self.ignore_nulls, - ordering_req: &self.ordering_req, + ordering_req: self.ordering_req.as_ref(), is_distinct: self.is_distinct, name: &self.name, is_reversed: self.is_reversed, @@ -425,7 +426,7 @@ impl AggregateFunctionExpr { return_type: &self.data_type, schema: &self.schema, ignore_nulls: self.ignore_nulls, - ordering_req: &self.ordering_req, + ordering_req: self.ordering_req.as_ref(), is_distinct: self.is_distinct, name: &self.name, is_reversed: self.is_reversed, @@ -444,7 +445,7 @@ impl AggregateFunctionExpr { return_type: &self.data_type, schema: &self.schema, ignore_nulls: self.ignore_nulls, - ordering_req: &self.ordering_req, + ordering_req: self.ordering_req.as_ref(), is_distinct: self.is_distinct, name: &self.name, is_reversed: self.is_reversed, @@ -462,7 +463,7 @@ impl AggregateFunctionExpr { ReversedUDAF::NotSupported => None, ReversedUDAF::Identical => Some(self.clone()), ReversedUDAF::Reversed(reverse_udf) => { - let reverse_ordering_req = reverse_order_bys(&self.ordering_req); + let reverse_ordering_req = reverse_order_bys(self.ordering_req.as_ref()); let mut name = self.name().to_string(); // If the function is changed, we need to reverse order_by clause as well // i.e. First(a order by b asc null first) -> Last(a order by b desc null last) @@ -473,7 +474,7 @@ impl AggregateFunctionExpr { replace_fn_name_clause(&mut name, self.fun.name(), reverse_udf.name()); AggregateExprBuilder::new(reverse_udf, self.args.to_vec()) - .order_by(reverse_ordering_req.to_vec()) + .order_by(reverse_ordering_req) .schema(Arc::new(self.schema.clone())) .alias(name) .with_ignore_nulls(self.ignore_nulls) @@ -489,7 +490,7 @@ impl AggregateFunctionExpr { /// These expressions are (1)function arguments, (2) order by expressions. pub fn all_expressions(&self) -> AggregatePhysicalExpressions { let args = self.expressions(); - let order_bys = self.order_bys().unwrap_or(&[]); + let order_bys = self.order_bys().unwrap_or_default(); let order_by_exprs = order_bys .iter() .map(|sort_expr| Arc::clone(&sort_expr.expr)) diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index c1851ddb22b5..7305bc1b0a2b 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -632,7 +632,7 @@ impl EquivalenceGroup { } result } - JoinType::LeftSemi | JoinType::LeftAnti => self.clone(), + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => self.clone(), JoinType::RightSemi | JoinType::RightAnti => right_equivalences.clone(), } } diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs index 95bb93d6ca57..902e53a7f236 100644 --- a/datafusion/physical-expr/src/equivalence/mod.rs +++ b/datafusion/physical-expr/src/equivalence/mod.rs @@ -80,6 +80,7 @@ mod tests { use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::{SchemaRef, SortOptions}; use datafusion_common::{plan_datafusion_err, Result}; + use datafusion_physical_expr_common::sort_expr::LexOrdering; pub fn output_schema( mapping: &ProjectionMapping, @@ -184,7 +185,7 @@ mod tests { // Convert each tuple to PhysicalSortExpr pub fn convert_to_sort_exprs( in_data: &[(&Arc, SortOptions)], - ) -> Vec { + ) -> LexOrdering { in_data .iter() .map(|(expr, options)| PhysicalSortExpr { @@ -197,7 +198,7 @@ mod tests { // Convert each inner tuple to PhysicalSortExpr pub fn convert_to_orderings( orderings: &[Vec<(&Arc, SortOptions)>], - ) -> Vec> { + ) -> Vec { orderings .iter() .map(|sort_exprs| convert_to_sort_exprs(sort_exprs)) @@ -207,20 +208,22 @@ mod tests { // Convert each tuple to PhysicalSortExpr pub fn convert_to_sort_exprs_owned( in_data: &[(Arc, SortOptions)], - ) -> Vec { - in_data - .iter() - .map(|(expr, options)| PhysicalSortExpr { - expr: Arc::clone(expr), - options: *options, - }) - .collect() + ) -> LexOrdering { + LexOrdering::new( + in_data + .iter() + .map(|(expr, options)| PhysicalSortExpr { + expr: Arc::clone(expr), + options: *options, + }) + .collect(), + ) } // Convert each inner tuple to PhysicalSortExpr pub fn convert_to_orderings_owned( orderings: &[Vec<(Arc, SortOptions)>], - ) -> Vec> { + ) -> Vec { orderings .iter() .map(|sort_exprs| convert_to_sort_exprs_owned(sort_exprs)) diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index d71f3b037fb1..838c9800f942 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use std::vec::IntoIter; use crate::equivalence::add_offset_to_expr; -use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; +use crate::{LexOrdering, PhysicalExpr}; use arrow_schema::SortOptions; /// An `OrderingEquivalenceClass` object keeps track of different alternative @@ -146,7 +146,12 @@ impl OrderingEquivalenceClass { /// Returns the concatenation of all the orderings. This enables merge /// operations to preserve all equivalent orderings simultaneously. pub fn output_ordering(&self) -> Option { - let output_ordering = self.orderings.iter().flatten().cloned().collect(); + let output_ordering = self + .orderings + .iter() + .flat_map(|ordering| ordering.as_ref()) + .cloned() + .collect(); let output_ordering = collapse_lex_ordering(output_ordering); (!output_ordering.is_empty()).then_some(output_ordering) } @@ -169,7 +174,7 @@ impl OrderingEquivalenceClass { for idx in 0..n_ordering { // Calculate cross product index let idx = outer_idx * n_ordering + idx; - self.orderings[idx].extend(ordering.iter().cloned()); + self.orderings[idx].inner.extend(ordering.iter().cloned()); } } self @@ -179,7 +184,7 @@ impl OrderingEquivalenceClass { /// ordering equivalence class. pub fn add_offset(&mut self, offset: usize) { for ordering in self.orderings.iter_mut() { - for sort_expr in ordering { + for sort_expr in ordering.inner.iter_mut() { sort_expr.expr = add_offset_to_expr(Arc::clone(&sort_expr.expr), offset); } } @@ -211,10 +216,10 @@ impl IntoIterator for OrderingEquivalenceClass { /// duplicate entries that have same physical expression inside. For example, /// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`. pub fn collapse_lex_ordering(input: LexOrdering) -> LexOrdering { - let mut output = Vec::::new(); - for item in input { + let mut output = LexOrdering::default(); + for item in input.iter() { if !output.iter().any(|req| req.expr.eq(&item.expr)) { - output.push(item); + output.push(item.clone()); } } output @@ -239,10 +244,10 @@ impl Display for OrderingEquivalenceClass { write!(f, "[")?; let mut iter = self.orderings.iter(); if let Some(ordering) = iter.next() { - write!(f, "[{}]", PhysicalSortExpr::format_list(ordering))?; + write!(f, "[{}]", ordering)?; } for ordering in iter { - write!(f, ", [{}]", PhysicalSortExpr::format_list(ordering))?; + write!(f, ", [{}]", ordering)?; } write!(f, "]")?; Ok(()) @@ -268,6 +273,7 @@ mod tests { use arrow_schema::SortOptions; use datafusion_common::{DFSchema, Result}; use datafusion_expr::{Operator, ScalarUDF}; + use datafusion_physical_expr_common::sort_expr::LexOrdering; #[test] fn test_ordering_satisfy() -> Result<()> { @@ -275,11 +281,11 @@ mod tests { Field::new("a", DataType::Int64, true), Field::new("b", DataType::Int64, true), ])); - let crude = vec![PhysicalSortExpr { + let crude = LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options: SortOptions::default(), - }]; - let finer = vec![ + }]); + let finer = LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options: SortOptions::default(), @@ -288,18 +294,18 @@ mod tests { expr: Arc::new(Column::new("b", 1)), options: SortOptions::default(), }, - ]; + ]); // finer ordering satisfies, crude ordering should return true let mut eq_properties_finer = EquivalenceProperties::new(Arc::clone(&input_schema)); eq_properties_finer.oeq_class.push(finer.clone()); - assert!(eq_properties_finer.ordering_satisfy(&crude)); + assert!(eq_properties_finer.ordering_satisfy(crude.as_ref())); // Crude ordering doesn't satisfy finer ordering. should return false let mut eq_properties_crude = EquivalenceProperties::new(Arc::clone(&input_schema)); eq_properties_crude.oeq_class.push(crude); - assert!(!eq_properties_crude.ordering_satisfy(&finer)); + assert!(!eq_properties_crude.ordering_satisfy(finer.as_ref())); Ok(()) } @@ -589,7 +595,7 @@ mod tests { let reqs = convert_to_sort_exprs(&reqs); assert_eq!( - eq_properties.ordering_satisfy(&reqs), + eq_properties.ordering_satisfy(reqs.as_ref()), expected, "{}", err_msg @@ -649,7 +655,7 @@ mod tests { format!("error in test reqs: {:?}, expected: {:?}", reqs, expected,); let reqs = convert_to_sort_exprs(&reqs); assert_eq!( - eq_properties.ordering_satisfy(&reqs), + eq_properties.ordering_satisfy(reqs.as_ref()), expected, "{}", err_msg diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index 9a16b205ae25..55c99e93d040 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -103,7 +103,7 @@ use itertools::Itertools; /// # use arrow_schema::{Schema, Field, DataType, SchemaRef}; /// # use datafusion_physical_expr::{ConstExpr, EquivalenceProperties}; /// # use datafusion_physical_expr::expressions::col; -/// use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +/// use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; /// # let schema: SchemaRef = Arc::new(Schema::new(vec![ /// # Field::new("a", DataType::Int32, false), /// # Field::new("b", DataType::Int32, false), @@ -116,12 +116,12 @@ use itertools::Itertools; /// // with a single constant value of b /// let mut eq_properties = EquivalenceProperties::new(schema) /// .with_constants(vec![ConstExpr::from(col_b)]); -/// eq_properties.add_new_ordering(vec![ +/// eq_properties.add_new_ordering(LexOrdering::new(vec![ /// PhysicalSortExpr::new_default(col_a).asc(), /// PhysicalSortExpr::new_default(col_c).desc(), -/// ]); +/// ])); /// -/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]") +/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC, c@2 DESC]], const: [b@1]") /// ``` #[derive(Debug, Clone)] pub struct EquivalenceProperties { @@ -185,6 +185,7 @@ impl EquivalenceProperties { let mut output_ordering = self.oeq_class().output_ordering().unwrap_or_default(); // Prune out constant expressions output_ordering + .inner .retain(|sort_expr| !const_exprs_contains(constants, &sort_expr.expr)); (!output_ordering.is_empty()).then_some(output_ordering) } @@ -196,7 +197,7 @@ impl EquivalenceProperties { OrderingEquivalenceClass::new( self.oeq_class .iter() - .map(|ordering| self.normalize_sort_exprs(ordering)) + .map(|ordering| self.normalize_sort_exprs(ordering.as_ref())) .collect(), ) } @@ -351,7 +352,7 @@ impl EquivalenceProperties { .iter() .filter(|ordering| ordering[0].expr.eq(&normalized_expr)) // First expression after leading ordering - .filter_map(|ordering| Some(ordering).zip(ordering.get(1))) + .filter_map(|ordering| Some(ordering).zip(ordering.inner.get(1))) { let leading_ordering = ordering[0].options; // Currently, we only handle expressions with a single child. @@ -378,7 +379,7 @@ impl EquivalenceProperties { // then we can deduce that ordering `[b ASC]` is also valid. // Hence, ordering `[b ASC]` can be added to the state as valid ordering. // (e.g. existing ordering where leading ordering is removed) - new_orderings.push(ordering[1..].to_vec()); + new_orderings.push(LexOrdering::new(ordering[1..].to_vec())); break; } } @@ -391,7 +392,7 @@ impl EquivalenceProperties { /// Updates the ordering equivalence group within assuming that the table /// is re-sorted according to the argument `sort_exprs`. Note that constants /// and equivalence classes are unchanged as they are unaffected by a re-sort. - pub fn with_reorder(mut self, sort_exprs: Vec) -> Self { + pub fn with_reorder(mut self, sort_exprs: LexOrdering) -> Self { // TODO: In some cases, existing ordering equivalences may still be valid add this analysis. self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]); self @@ -605,8 +606,8 @@ impl EquivalenceProperties { pub fn substitute_ordering_component( &self, mapping: &ProjectionMapping, - sort_expr: &[PhysicalSortExpr], - ) -> Result>> { + sort_expr: LexOrderingRef, + ) -> Result> { let new_orderings = sort_expr .iter() .map(|sort_expr| { @@ -616,7 +617,7 @@ impl EquivalenceProperties { .filter(|source| expr_refers(source, &sort_expr.expr)) .cloned() .collect(); - let mut res = vec![sort_expr.clone()]; + let mut res = LexOrdering::new(vec![sort_expr.clone()]); // TODO: Add one-to-ones analysis for ScalarFunctions. for r_expr in referring_exprs { // we check whether this expression is substitutable or not @@ -639,7 +640,9 @@ impl EquivalenceProperties { // Generate all valid orderings, given substituted expressions. let res = new_orderings .into_iter() + .map(|ordering| ordering.inner) .multi_cartesian_product() + .map(LexOrdering::new) .collect::>(); Ok(res) } @@ -653,7 +656,7 @@ impl EquivalenceProperties { let orderings = &self.oeq_class.orderings; let new_order = orderings .iter() - .map(|order| self.substitute_ordering_component(mapping, order)) + .map(|order| self.substitute_ordering_component(mapping, order.as_ref())) .collect::>>()?; let new_order = new_order.into_iter().flatten().collect(); self.oeq_class = OrderingEquivalenceClass::new(new_order); @@ -836,7 +839,7 @@ impl EquivalenceProperties { if prefixes.is_empty() { // If prefix is empty, there is no dependency. Insert // empty ordering: - prefixes = vec![vec![]]; + prefixes = vec![LexOrdering::default()]; } // Append current ordering on top its dependencies: for ordering in prefixes.iter_mut() { @@ -986,7 +989,8 @@ impl EquivalenceProperties { // Add new ordered section to the state. result.extend(ordered_exprs); } - result.into_iter().unzip() + let (left, right) = result.into_iter().unzip(); + (LexOrdering::new(left), right) } /// This function determines whether the provided expression is constant @@ -1076,6 +1080,7 @@ impl EquivalenceProperties { let mut new_orderings = vec![]; for ordering in self.oeq_class.orderings { let new_ordering = ordering + .inner .into_iter() .map(|mut sort_expr| { sort_expr.expr = with_new_schema(sort_expr.expr, &schema)?; @@ -1313,7 +1318,7 @@ fn construct_prefix_orderings( /// Generates all possible orderings where dependencies are satisfied for the /// current projection expression. /// -/// # Examaple +/// # Example /// If `dependences` is `a + b ASC` and the dependency map holds dependencies /// * `a ASC` --> `[c ASC]` /// * `b ASC` --> `[d DESC]`, @@ -1348,7 +1353,7 @@ fn generate_dependency_orderings( // No dependency, dependent is a leading ordering. if relevant_prefixes.is_empty() { // Return an empty ordering: - return vec![vec![]]; + return vec![LexOrdering::default()]; } relevant_prefixes @@ -1358,7 +1363,12 @@ fn generate_dependency_orderings( prefix_orderings .iter() .permutations(prefix_orderings.len()) - .map(|prefixes| prefixes.into_iter().flatten().cloned().collect()) + .map(|prefixes| { + prefixes + .into_iter() + .flat_map(|ordering| ordering.inner.clone()) + .collect() + }) .collect::>() }) .collect() @@ -1651,7 +1661,7 @@ impl<'a> DependencyEnumerator<'a> { // An empty dependency means the referred_sort_expr represents a global ordering. // Return its projected version, which is the target_expression. if node.dependencies.is_empty() { - return vec![vec![target_sort_expr.clone()]]; + return vec![LexOrdering::new(vec![target_sort_expr.clone()])]; }; node.dependencies @@ -1961,7 +1971,7 @@ impl UnionEquivalentOrderingBuilder { ) -> AddedOrdering { if ordering.is_empty() { AddedOrdering::Yes - } else if constants.is_empty() && properties.ordering_satisfy(&ordering) { + } else if constants.is_empty() && properties.ordering_satisfy(ordering.as_ref()) { // If the ordering satisfies the target properties, no need to // augment it with constants. self.orderings.push(ordering); @@ -2002,7 +2012,7 @@ impl UnionEquivalentOrderingBuilder { &properties.constants, ) { if !augmented_ordering.is_empty() { - assert!(properties.ordering_satisfy(&augmented_ordering)); + assert!(properties.ordering_satisfy(augmented_ordering.as_ref())); self.orderings.push(augmented_ordering); } } @@ -2022,9 +2032,9 @@ impl UnionEquivalentOrderingBuilder { existing_ordering: &LexOrdering, existing_constants: &[ConstExpr], ) -> Option { - let mut augmented_ordering = vec![]; - let mut sort_expr_iter = ordering.iter().peekable(); - let mut existing_sort_expr_iter = existing_ordering.iter().peekable(); + let mut augmented_ordering = LexOrdering::default(); + let mut sort_expr_iter = ordering.inner.iter().peekable(); + let mut existing_sort_expr_iter = existing_ordering.inner.iter().peekable(); // walk in parallel down the two orderings, trying to match them up while sort_expr_iter.peek().is_some() || existing_sort_expr_iter.peek().is_some() @@ -2170,20 +2180,20 @@ mod tests { let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema)); // add equivalent ordering [a, b, c, d] - input_properties.add_new_ordering(vec![ + input_properties.add_new_ordering(LexOrdering::new(vec![ parse_sort_expr("a", &input_schema), parse_sort_expr("b", &input_schema), parse_sort_expr("c", &input_schema), parse_sort_expr("d", &input_schema), - ]); + ])); // add equivalent ordering [a, c, b, d] - input_properties.add_new_ordering(vec![ + input_properties.add_new_ordering(LexOrdering::new(vec![ parse_sort_expr("a", &input_schema), parse_sort_expr("c", &input_schema), parse_sort_expr("b", &input_schema), // NB b and c are swapped parse_sort_expr("d", &input_schema), - ]); + ])); // simply project all the columns in order let proj_exprs = vec![ @@ -2197,7 +2207,7 @@ mod tests { assert_eq!( out_properties.to_string(), - "order: [[a@0 ASC,c@2 ASC,b@1 ASC,d@3 ASC], [a@0 ASC,b@1 ASC,c@2 ASC,d@3 ASC]]" + "order: [[a@0 ASC, c@2 ASC, b@1 ASC, d@3 ASC], [a@0 ASC, b@1 ASC, c@2 ASC, d@3 ASC]]" ); Ok(()) @@ -2403,27 +2413,27 @@ mod tests { eq_properties.add_equal_conditions(&col_a_expr, &col_c_expr)?; let others = vec![ - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(&col_b_expr), options: sort_options, - }], - vec![PhysicalSortExpr { + }]), + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(&col_c_expr), options: sort_options, - }], + }]), ]; eq_properties.add_new_orderings(others); let mut expected_eqs = EquivalenceProperties::new(Arc::new(schema)); expected_eqs.add_new_orderings([ - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(&col_b_expr), options: sort_options, - }], - vec![PhysicalSortExpr { + }]), + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(&col_c_expr), options: sort_options, - }], + }]), ]); let oeq_class = eq_properties.oeq_class().clone(); @@ -2446,7 +2456,7 @@ mod tests { let col_b = &col("b", &schema)?; let required_columns = [Arc::clone(col_b), Arc::clone(col_a)]; let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); - eq_properties.add_new_orderings([vec![ + eq_properties.add_new_orderings([LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("b", 1)), options: sort_options_not, @@ -2455,12 +2465,12 @@ mod tests { expr: Arc::new(Column::new("a", 0)), options: sort_options, }, - ]]); + ])]); let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); assert_eq!(idxs, vec![0, 1]); assert_eq!( result, - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::clone(col_b), options: sort_options_not @@ -2469,7 +2479,7 @@ mod tests { expr: Arc::clone(col_a), options: sort_options } - ] + ]) ); let schema = Schema::new(vec![ @@ -2482,11 +2492,11 @@ mod tests { let required_columns = [Arc::clone(col_b), Arc::clone(col_a)]; let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); eq_properties.add_new_orderings([ - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("c", 2)), options: sort_options, - }], - vec![ + }]), + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("b", 1)), options: sort_options_not, @@ -2495,13 +2505,13 @@ mod tests { expr: Arc::new(Column::new("a", 0)), options: sort_options, }, - ], + ]), ]); let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); assert_eq!(idxs, vec![0, 1]); assert_eq!( result, - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::clone(col_b), options: sort_options_not @@ -2510,7 +2520,7 @@ mod tests { expr: Arc::clone(col_a), options: sort_options } - ] + ]) ); let required_columns = [ @@ -2525,7 +2535,7 @@ mod tests { let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); // not satisfied orders - eq_properties.add_new_orderings([vec![ + eq_properties.add_new_orderings([LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("b", 1)), options: sort_options_not, @@ -2538,7 +2548,7 @@ mod tests { expr: Arc::new(Column::new("a", 0)), options: sort_options, }, - ]]); + ])]); let (_, idxs) = eq_properties.find_longest_permutation(&required_columns); assert_eq!(idxs, vec![0]); @@ -2567,14 +2577,14 @@ mod tests { eq_properties.add_equal_conditions(col_b, col_a)?; // [b ASC], [d ASC] eq_properties.add_new_orderings(vec![ - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(col_b), options: option_asc, - }], - vec![PhysicalSortExpr { + }]), + LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::clone(col_d), options: option_asc, - }], + }]), ]); let test_cases = vec![ @@ -2605,7 +2615,7 @@ mod tests { let leading_orderings = eq_properties .oeq_class() .iter() - .flat_map(|ordering| ordering.first().cloned()) + .flat_map(|ordering| ordering.inner.first().cloned()) .collect::>(); let expr_props = eq_properties.get_expr_properties(Arc::clone(&expr)); let err_msg = format!( @@ -2649,7 +2659,7 @@ mod tests { nulls_first: true, }; // [d ASC, h DESC] also satisfies schema. - eq_properties.add_new_orderings([vec![ + eq_properties.add_new_orderings([LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::clone(col_d), options: option_asc, @@ -2658,7 +2668,7 @@ mod tests { expr: Arc::clone(col_h), options: option_desc, }, - ]]); + ])]); let test_cases = vec![ // TEST CASE 1 (vec![col_a], vec![(col_a, option_asc)]), @@ -2940,7 +2950,7 @@ mod tests { Field::new("c", DataType::Timestamp(TimeUnit::Nanosecond, None), true), ])); let base_properties = EquivalenceProperties::new(Arc::clone(&schema)) - .with_reorder( + .with_reorder(LexOrdering::new( ["a", "b", "c"] .into_iter() .map(|c| { @@ -2953,7 +2963,7 @@ mod tests { }) }) .collect::>>()?, - ); + )); struct TestCase { name: &'static str, @@ -3042,10 +3052,10 @@ mod tests { options: SortOptions::default(), }) }) - .collect::>>()?; + .collect::>()?; assert_eq!( - properties.ordering_satisfy(&sort), + properties.ordering_satisfy(sort.as_ref()), case.should_satisfy_ordering, "failed test '{}'", case.name @@ -3564,7 +3574,7 @@ mod tests { ordering .iter() .map(|name| parse_sort_expr(name, schema)) - .collect::>() + .collect::() }) .collect::>(); diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs index b69954e00bba..6d91e9dfdd36 100644 --- a/datafusion/physical-expr/src/expressions/not.rs +++ b/datafusion/physical-expr/src/expressions/not.rs @@ -27,6 +27,7 @@ use crate::PhysicalExpr; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; use datafusion_common::{cast::as_boolean_array, Result, ScalarValue}; +use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::ColumnarValue; /// Not expression @@ -100,6 +101,10 @@ impl PhysicalExpr for NotExpr { Ok(Arc::new(NotExpr::new(Arc::clone(&children[0])))) } + fn evaluate_bounds(&self, children: &[&Interval]) -> Result { + children[0].not() + } + fn dyn_hash(&self, state: &mut dyn Hasher) { let mut s = state; self.hash(&mut s); @@ -125,10 +130,11 @@ mod tests { use super::*; use crate::expressions::col; use arrow::{array::BooleanArray, datatypes::*}; + use std::sync::OnceLock; #[test] fn neg_op() -> Result<()> { - let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]); + let schema = schema(); let expr = not(col("a", &schema)?)?; assert_eq!(expr.data_type(&schema)?, DataType::Boolean); @@ -137,8 +143,7 @@ mod tests { let input = BooleanArray::from(vec![Some(true), None, Some(false)]); let expected = &BooleanArray::from(vec![Some(false), None, Some(true)]); - let batch = - RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?; + let batch = RecordBatch::try_new(schema, vec![Arc::new(input)])?; let result = expr .evaluate(&batch)? @@ -150,4 +155,48 @@ mod tests { Ok(()) } + + #[test] + fn test_evaluate_bounds() -> Result<()> { + // Note that `None` for boolean intervals is converted to `Some(false)` + // / `Some(true)` by `Interval::make`, so it is not explicitly tested + // here + + // if the bounds are all booleans (false, true) so is the negation + assert_evaluate_bounds( + Interval::make(Some(false), Some(true))?, + Interval::make(Some(false), Some(true))?, + )?; + // (true, false) is not tested because it is not a valid interval (lower + // bound is greater than upper bound) + assert_evaluate_bounds( + Interval::make(Some(true), Some(true))?, + Interval::make(Some(false), Some(false))?, + )?; + assert_evaluate_bounds( + Interval::make(Some(false), Some(false))?, + Interval::make(Some(true), Some(true))?, + )?; + Ok(()) + } + + fn assert_evaluate_bounds( + interval: Interval, + expected_interval: Interval, + ) -> Result<()> { + let not_expr = not(col("a", &schema())?)?; + assert_eq!( + not_expr.evaluate_bounds(&[&interval]).unwrap(), + expected_interval + ); + Ok(()) + } + + fn schema() -> SchemaRef { + Arc::clone(SCHEMA.get_or_init(|| { + Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, true)])) + })) + } + + static SCHEMA: OnceLock = OnceLock::new(); } diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index 4bd022975ac3..c3d1b1425b7f 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -35,6 +35,7 @@ use datafusion_common::tree_node::{ use datafusion_common::Result; use datafusion_expr::Operator; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; use itertools::Itertools; use petgraph::graph::NodeIndex; use petgraph::stable_graph::StableGraph; @@ -245,10 +246,7 @@ pub fn reassign_predicate_columns( } /// Merge left and right sort expressions, checking for duplicates. -pub fn merge_vectors( - left: &[PhysicalSortExpr], - right: &[PhysicalSortExpr], -) -> Vec { +pub fn merge_vectors(left: LexOrderingRef, right: LexOrderingRef) -> LexOrdering { left.iter() .cloned() .chain(right.iter().cloned()) diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 3fe5d842dfd1..94960c95e4bb 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -25,16 +25,16 @@ use arrow::array::Array; use arrow::record_batch::RecordBatch; use arrow::{array::ArrayRef, datatypes::Field}; -use datafusion_common::ScalarValue; -use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::{Accumulator, WindowFrame}; - use crate::aggregate::AggregateFunctionExpr; use crate::window::window_expr::AggregateWindowExpr; use crate::window::{ PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr, }; -use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; +use crate::{reverse_order_bys, PhysicalExpr}; +use datafusion_common::ScalarValue; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::{Accumulator, WindowFrame}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; /// A window expr that takes the form of an aggregate function. /// @@ -43,7 +43,7 @@ use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; pub struct PlainAggregateWindowExpr { aggregate: Arc, partition_by: Vec>, - order_by: Vec, + order_by: LexOrdering, window_frame: Arc, } @@ -52,13 +52,13 @@ impl PlainAggregateWindowExpr { pub fn new( aggregate: Arc, partition_by: &[Arc], - order_by: &[PhysicalSortExpr], + order_by: LexOrderingRef, window_frame: Arc, ) -> Self { Self { aggregate, partition_by: partition_by.to_vec(), - order_by: order_by.to_vec(), + order_by: LexOrdering::from_ref(order_by), window_frame, } } @@ -124,8 +124,8 @@ impl WindowExpr for PlainAggregateWindowExpr { &self.partition_by } - fn order_by(&self) -> &[PhysicalSortExpr] { - &self.order_by + fn order_by(&self) -> LexOrderingRef { + self.order_by.as_ref() } fn get_window_frame(&self) -> &Arc { @@ -139,14 +139,14 @@ impl WindowExpr for PlainAggregateWindowExpr { Arc::new(PlainAggregateWindowExpr::new( Arc::new(reverse_expr), &self.partition_by.clone(), - &reverse_order_bys(&self.order_by), + reverse_order_bys(self.order_by.as_ref()).as_ref(), Arc::new(self.window_frame.reverse()), )) as _ } else { Arc::new(SlidingAggregateWindowExpr::new( Arc::new(reverse_expr), &self.partition_by.clone(), - &reverse_order_bys(&self.order_by), + reverse_order_bys(self.order_by.as_ref()).as_ref(), Arc::new(self.window_frame.reverse()), )) as _ } diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index 8ff277db37df..5f6c5e5c2c1b 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -22,7 +22,6 @@ use std::ops::Range; use std::sync::Arc; use super::{BuiltInWindowFunctionExpr, WindowExpr}; -use crate::expressions::PhysicalSortExpr; use crate::window::window_expr::{get_orderby_values, WindowFn}; use crate::window::{PartitionBatches, PartitionWindowAggStates, WindowState}; use crate::{reverse_order_bys, EquivalenceProperties, PhysicalExpr}; @@ -34,13 +33,14 @@ use datafusion_common::utils::evaluate_partition_ranges; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::window_state::{WindowAggState, WindowFrameContext}; use datafusion_expr::WindowFrame; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; /// A window expr that takes the form of a [`BuiltInWindowFunctionExpr`]. #[derive(Debug)] pub struct BuiltInWindowExpr { expr: Arc, partition_by: Vec>, - order_by: Vec, + order_by: LexOrdering, window_frame: Arc, } @@ -49,13 +49,13 @@ impl BuiltInWindowExpr { pub fn new( expr: Arc, partition_by: &[Arc], - order_by: &[PhysicalSortExpr], + order_by: LexOrderingRef, window_frame: Arc, ) -> Self { Self { expr, partition_by: partition_by.to_vec(), - order_by: order_by.to_vec(), + order_by: LexOrdering::from_ref(order_by), window_frame, } } @@ -76,7 +76,8 @@ impl BuiltInWindowExpr { if let Some(fn_res_ordering) = self.expr.get_result_ordering(schema) { if self.partition_by.is_empty() { // In the absence of a PARTITION BY, ordering of `self.expr` is global: - eq_properties.add_new_orderings([vec![fn_res_ordering]]); + eq_properties + .add_new_orderings([LexOrdering::new(vec![fn_res_ordering])]); } else { // If we have a PARTITION BY, built-in functions can not introduce // a global ordering unless the existing ordering is compatible @@ -117,8 +118,8 @@ impl WindowExpr for BuiltInWindowExpr { &self.partition_by } - fn order_by(&self) -> &[PhysicalSortExpr] { - &self.order_by + fn order_by(&self) -> LexOrderingRef { + self.order_by.as_ref() } fn evaluate(&self, batch: &RecordBatch) -> Result { @@ -266,7 +267,7 @@ impl WindowExpr for BuiltInWindowExpr { Arc::new(BuiltInWindowExpr::new( reverse_expr, &self.partition_by.clone(), - &reverse_order_bys(&self.order_by), + reverse_order_bys(self.order_by.as_ref()).as_ref(), Arc::new(self.window_frame.reverse()), )) as _ }) diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs index b889ec8c5d98..1e46baae7b0a 100644 --- a/datafusion/physical-expr/src/window/sliding_aggregate.rs +++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs @@ -25,15 +25,15 @@ use arrow::array::{Array, ArrayRef}; use arrow::datatypes::Field; use arrow::record_batch::RecordBatch; -use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::{Accumulator, WindowFrame}; - use crate::aggregate::AggregateFunctionExpr; use crate::window::window_expr::AggregateWindowExpr; use crate::window::{ PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr, }; use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; +use datafusion_common::{Result, ScalarValue}; +use datafusion_expr::{Accumulator, WindowFrame}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef}; /// A window expr that takes the form of an aggregate function that /// can be incrementally computed over sliding windows. @@ -43,7 +43,7 @@ use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; pub struct SlidingAggregateWindowExpr { aggregate: Arc, partition_by: Vec>, - order_by: Vec, + order_by: LexOrdering, window_frame: Arc, } @@ -52,13 +52,13 @@ impl SlidingAggregateWindowExpr { pub fn new( aggregate: Arc, partition_by: &[Arc], - order_by: &[PhysicalSortExpr], + order_by: LexOrderingRef, window_frame: Arc, ) -> Self { Self { aggregate, partition_by: partition_by.to_vec(), - order_by: order_by.to_vec(), + order_by: LexOrdering::from_ref(order_by), window_frame, } } @@ -108,8 +108,8 @@ impl WindowExpr for SlidingAggregateWindowExpr { &self.partition_by } - fn order_by(&self) -> &[PhysicalSortExpr] { - &self.order_by + fn order_by(&self) -> LexOrderingRef { + self.order_by.as_ref() } fn get_window_frame(&self) -> &Arc { @@ -123,14 +123,14 @@ impl WindowExpr for SlidingAggregateWindowExpr { Arc::new(PlainAggregateWindowExpr::new( Arc::new(reverse_expr), &self.partition_by.clone(), - &reverse_order_bys(&self.order_by), + reverse_order_bys(self.order_by.as_ref()).as_ref(), Arc::new(self.window_frame.reverse()), )) as _ } else { Arc::new(SlidingAggregateWindowExpr::new( Arc::new(reverse_expr), &self.partition_by.clone(), - &reverse_order_bys(&self.order_by), + reverse_order_bys(self.order_by.as_ref()).as_ref(), Arc::new(self.window_frame.reverse()), )) as _ } @@ -157,7 +157,7 @@ impl WindowExpr for SlidingAggregateWindowExpr { expr: new_expr, options: req.options, }) - .collect::>(); + .collect::(); Some(Arc::new(SlidingAggregateWindowExpr { aggregate: self .aggregate diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs index 46c46fab68c5..0f882def4433 100644 --- a/datafusion/physical-expr/src/window/window_expr.rs +++ b/datafusion/physical-expr/src/window/window_expr.rs @@ -20,7 +20,7 @@ use std::fmt::Debug; use std::ops::Range; use std::sync::Arc; -use crate::{LexOrderingRef, PhysicalExpr, PhysicalSortExpr}; +use crate::{LexOrderingRef, PhysicalExpr}; use arrow::array::{new_empty_array, Array, ArrayRef}; use arrow::compute::kernels::sort::SortColumn; @@ -109,7 +109,7 @@ pub trait WindowExpr: Send + Sync + Debug { fn partition_by(&self) -> &[Arc]; /// Expressions that's from the window function's order by clause, empty if absent - fn order_by(&self) -> &[PhysicalSortExpr]; + fn order_by(&self) -> LexOrderingRef; /// Get order by columns, empty if absent fn order_by_columns(&self, batch: &RecordBatch) -> Result> { diff --git a/datafusion/physical-optimizer/src/topk_aggregation.rs b/datafusion/physical-optimizer/src/topk_aggregation.rs index c8a28ed0ec0b..0e5fb82d9e93 100644 --- a/datafusion/physical-optimizer/src/topk_aggregation.rs +++ b/datafusion/physical-optimizer/src/topk_aggregation.rs @@ -25,6 +25,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::Result; use datafusion_physical_expr::expressions::Column; +use datafusion_physical_expr::LexOrdering; use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::projection::ProjectionExec; @@ -126,7 +127,7 @@ impl TopKAggregation { Ok(Transformed::no(plan)) }; let child = Arc::clone(child).transform_down(closure).data().ok()?; - let sort = SortExec::new(sort.expr().to_vec(), child) + let sort = SortExec::new(LexOrdering::new(sort.expr().to_vec()), child) .with_fetch(sort.fetch()) .with_preserve_partitioning(sort.preserve_partitioning()); Some(Arc::new(sort)) diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 7fcd719539ec..a9f9b22fafda 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -68,6 +68,7 @@ rand = { workspace = true } tokio = { workspace = true } [dev-dependencies] +criterion = { version = "0.5", features = ["async_futures"] } datafusion-functions-aggregate = { workspace = true } rstest = { workspace = true } rstest_reuse = "0.7.0" @@ -76,3 +77,7 @@ tokio = { workspace = true, features = [ "fs", "parking_lot", ] } + +[[bench]] +harness = false +name = "spm" diff --git a/datafusion/physical-plan/benches/spm.rs b/datafusion/physical-plan/benches/spm.rs new file mode 100644 index 000000000000..fbbd27409173 --- /dev/null +++ b/datafusion/physical-plan/benches/spm.rs @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::record_batch::RecordBatch; +use arrow_array::{ArrayRef, Int32Array, Int64Array, StringArray}; +use datafusion_execution::TaskContext; +use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; +use datafusion_physical_plan::memory::MemoryExec; +use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion_physical_plan::{collect, ExecutionPlan}; + +use criterion::async_executor::FuturesExecutor; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +fn generate_spm_for_round_robin_tie_breaker( + has_same_value: bool, + enable_round_robin_repartition: bool, + batch_count: usize, + partition_count: usize, +) -> SortPreservingMergeExec { + let row_size = 256; + let rb = if has_same_value { + let a: ArrayRef = Arc::new(Int32Array::from(vec![1; row_size])); + let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a"); row_size])); + let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size])); + RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap() + } else { + let v = (0i32..row_size as i32).collect::>(); + let a: ArrayRef = Arc::new(Int32Array::from(v)); + + // Use alphanumeric characters + let charset: Vec = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + .chars() + .collect(); + + let mut strings = Vec::new(); + for i in 0..256 { + let mut s = String::new(); + s.push(charset[i % charset.len()]); + s.push(charset[(i / charset.len()) % charset.len()]); + strings.push(Some(s)); + } + + let b: ArrayRef = Arc::new(StringArray::from_iter(strings)); + + let v = (0i64..row_size as i64).collect::>(); + let c: ArrayRef = Arc::new(Int64Array::from_iter(v)); + RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap() + }; + + let rbs = (0..batch_count).map(|_| rb.clone()).collect::>(); + let partitiones = vec![rbs.clone(); partition_count]; + + let schema = rb.schema(); + let sort = LexOrdering::new(vec![ + PhysicalSortExpr { + expr: col("b", &schema).unwrap(), + options: Default::default(), + }, + PhysicalSortExpr { + expr: col("c", &schema).unwrap(), + options: Default::default(), + }, + ]); + + let exec = MemoryExec::try_new(&partitiones, schema, None).unwrap(); + SortPreservingMergeExec::new(sort, Arc::new(exec)) + .with_round_robin_repartition(enable_round_robin_repartition) +} + +fn run_bench( + c: &mut Criterion, + has_same_value: bool, + enable_round_robin_repartition: bool, + batch_count: usize, + partition_count: usize, + description: &str, +) { + let task_ctx = TaskContext::default(); + let task_ctx = Arc::new(task_ctx); + + let spm = Arc::new(generate_spm_for_round_robin_tie_breaker( + has_same_value, + enable_round_robin_repartition, + batch_count, + partition_count, + )) as Arc; + + c.bench_function(description, |b| { + b.to_async(FuturesExecutor) + .iter(|| black_box(collect(Arc::clone(&spm), Arc::clone(&task_ctx)))) + }); +} + +fn criterion_benchmark(c: &mut Criterion) { + let params = [ + (true, false, "low_card_without_tiebreaker"), // low cardinality, no tie breaker + (true, true, "low_card_with_tiebreaker"), // low cardinality, with tie breaker + (false, false, "high_card_without_tiebreaker"), // high cardinality, no tie breaker + (false, true, "high_card_with_tiebreaker"), // high cardinality, with tie breaker + ]; + + let batch_counts = [1, 25, 625]; + let partition_counts = [2, 8, 32]; + + for &(has_same_value, enable_round_robin_repartition, cardinality_label) in ¶ms { + for &batch_count in &batch_counts { + for &partition_count in &partition_counts { + let description = format!( + "{}_batch_count_{}_partition_count_{}", + cardinality_label, batch_count, partition_count + ); + run_bench( + c, + has_same_value, + enable_round_robin_repartition, + batch_count, + partition_count, + &description, + ); + } + } + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 48a03af19dbd..5ffe797c5c26 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -344,7 +344,7 @@ impl From for SendableRecordBatchStream { } /// Hash aggregate execution plan -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct AggregateExec { /// Aggregation mode (full, partial) mode: AggregateMode, @@ -937,10 +937,10 @@ fn get_aggregate_expr_req( // necessary, or the aggregation is performing a "second stage" calculation, // then ignore the ordering requirement. if !aggr_expr.order_sensitivity().hard_requires() || !agg_mode.is_first_stage() { - return vec![]; + return LexOrdering::default(); } - let mut req = aggr_expr.order_bys().unwrap_or_default().to_vec(); + let mut req = LexOrdering::from_ref(aggr_expr.order_bys().unwrap_or_default()); // In non-first stage modes, we accumulate data (using `merge_batch`) from // different partitions (i.e. merge partial results). During this merge, we @@ -983,7 +983,7 @@ fn finer_ordering( agg_mode: &AggregateMode, ) -> Option { let aggr_req = get_aggregate_expr_req(aggr_expr, group_by, agg_mode); - eq_properties.get_finer_ordering(existing_req, &aggr_req) + eq_properties.get_finer_ordering(existing_req.as_ref(), aggr_req.as_ref()) } /// Concatenates the given slices. @@ -1014,12 +1014,12 @@ pub fn get_finer_aggregate_exprs_requirement( eq_properties: &EquivalenceProperties, agg_mode: &AggregateMode, ) -> Result { - let mut requirement = vec![]; + let mut requirement = LexOrdering::default(); for aggr_expr in aggr_exprs.iter_mut() { if let Some(finer_ordering) = finer_ordering(&requirement, aggr_expr, group_by, eq_properties, agg_mode) { - if eq_properties.ordering_satisfy(&finer_ordering) { + if eq_properties.ordering_satisfy(finer_ordering.as_ref()) { // Requirement is satisfied by existing ordering requirement = finer_ordering; continue; @@ -1033,7 +1033,7 @@ pub fn get_finer_aggregate_exprs_requirement( eq_properties, agg_mode, ) { - if eq_properties.ordering_satisfy(&finer_ordering) { + if eq_properties.ordering_satisfy(finer_ordering.as_ref()) { // Reverse requirement is satisfied by exiting ordering. // Hence reverse the aggregator requirement = finer_ordering; @@ -1074,7 +1074,9 @@ pub fn get_finer_aggregate_exprs_requirement( ); } - Ok(PhysicalSortRequirement::from_sort_exprs(&requirement)) + Ok(PhysicalSortRequirement::from_sort_exprs( + requirement.inner.iter(), + )) } /// Returns physical expressions for arguments to evaluate against a batch. @@ -2088,7 +2090,7 @@ mod tests { let args = [col("b", schema)?]; AggregateExprBuilder::new(first_value_udaf(), args.to_vec()) - .order_by(ordering_req.to_vec()) + .order_by(LexOrdering::new(ordering_req.to_vec())) .schema(Arc::new(schema.clone())) .alias(String::from("first_value(b) ORDER BY [b ASC NULLS LAST]")) .build() @@ -2106,7 +2108,7 @@ mod tests { }]; let args = [col("b", schema)?]; AggregateExprBuilder::new(last_value_udaf(), args.to_vec()) - .order_by(ordering_req.to_vec()) + .order_by(LexOrdering::new(ordering_req.to_vec())) .schema(Arc::new(schema.clone())) .alias(String::from("last_value(b) ORDER BY [b ASC NULLS LAST]")) .build() @@ -2272,7 +2274,7 @@ mod tests { ]), ]; - let common_requirement = vec![ + let common_requirement = LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::clone(col_a), options: options1, @@ -2281,14 +2283,14 @@ mod tests { expr: Arc::clone(col_c), options: options1, }, - ]; + ]); let mut aggr_exprs = order_by_exprs .into_iter() .map(|order_by_expr| { let ordering_req = order_by_expr.unwrap_or_default(); AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)]) .alias("a") - .order_by(ordering_req.to_vec()) + .order_by(LexOrdering::new(ordering_req.to_vec())) .schema(Arc::clone(&test_schema)) .build() .map(Arc::new) diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs index accb2fda1131..24846d239591 100644 --- a/datafusion/physical-plan/src/aggregates/order/mod.rs +++ b/datafusion/physical-plan/src/aggregates/order/mod.rs @@ -19,7 +19,7 @@ use arrow_array::ArrayRef; use arrow_schema::Schema; use datafusion_common::Result; use datafusion_expr::EmitTo; -use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::mem::size_of; mod full; @@ -45,7 +45,7 @@ impl GroupOrdering { pub fn try_new( input_schema: &Schema, mode: &InputOrderMode, - ordering: &[PhysicalSortExpr], + ordering: LexOrderingRef, ) -> Result { match mode { InputOrderMode::Linear => Ok(GroupOrdering::None), diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs index 2dd1ea8a5449..5cc55dc0d028 100644 --- a/datafusion/physical-plan/src/aggregates/order/partial.rs +++ b/datafusion/physical-plan/src/aggregates/order/partial.rs @@ -21,7 +21,7 @@ use arrow_schema::Schema; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; use datafusion_expr::EmitTo; -use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use std::mem::size_of; use std::sync::Arc; @@ -107,7 +107,7 @@ impl GroupOrderingPartial { pub fn try_new( input_schema: &Schema, order_indices: &[usize], - ordering: &[PhysicalSortExpr], + ordering: LexOrderingRef, ) -> Result { assert!(!order_indices.is_empty()); assert!(order_indices.len() <= ordering.len()); diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 7d21cc2f1944..fe05f7375ed3 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -48,14 +48,14 @@ use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr}; +use super::order::GroupOrdering; +use super::AggregateExec; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use futures::ready; use futures::stream::{Stream, StreamExt}; use log::debug; -use super::order::GroupOrdering; -use super::AggregateExec; - #[derive(Debug, Clone)] /// This object tracks the aggregation phase (input/output) pub(crate) enum ExecutionState { @@ -80,7 +80,7 @@ struct SpillState { // the execution. // ======================================================================== /// Sorting expression for spilling batches - spill_expr: Vec, + spill_expr: LexOrdering, /// Schema for spilling batches spill_schema: SchemaRef, @@ -511,7 +511,7 @@ impl GroupedHashAggregateStream { let group_ordering = GroupOrdering::try_new( &group_schema, &agg.input_order_mode, - ordering.as_slice(), + ordering.as_ref(), )?; let group_values = new_group_values(group_schema)?; @@ -965,7 +965,7 @@ impl GroupedHashAggregateStream { /// Emit all rows, sort them, and store them on disk. fn spill(&mut self) -> Result<()> { let emit = self.emit(EmitTo::All, true)?; - let sorted = sort_batch(&emit, &self.spill_state.spill_expr, None)?; + let sorted = sort_batch(&emit, self.spill_state.spill_expr.as_ref(), None)?; let spillfile = self.runtime.disk_manager.create_tmp_file("HashAggSpill")?; // TODO: slice large `sorted` and write to multiple files in parallel spill_record_batch_by_size( @@ -1030,7 +1030,7 @@ impl GroupedHashAggregateStream { streams.push(Box::pin(RecordBatchStreamAdapter::new( Arc::clone(&schema), futures::stream::once(futures::future::lazy(move |_| { - sort_batch(&batch, &expr, None) + sort_batch(&batch, expr.as_ref(), None) })), ))); for spill in self.spill_state.spills.drain(..) { @@ -1041,7 +1041,7 @@ impl GroupedHashAggregateStream { self.input = StreamingMergeBuilder::new() .with_streams(streams) .with_schema(schema) - .with_expressions(&self.spill_state.spill_expr) + .with_expressions(self.spill_state.spill_expr.as_ref()) .with_metrics(self.baseline_metrics.clone()) .with_batch_size(self.batch_size) .with_reservation(self.reservation.new_empty()) diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 61fb3599f013..11678e7a4696 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -48,7 +48,7 @@ use futures::stream::{Stream, StreamExt}; /// reaches the `fetch` value. /// /// See [`BatchCoalescer`] for more information -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CoalesceBatchesExec { /// The input plan input: Arc, diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index f9d4ec6a1a34..3da101d6092f 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -36,7 +36,7 @@ use datafusion_execution::TaskContext; /// Merge execution plan executes partitions in parallel and combines them into a single /// partition. No guarantees are made about the order of the resulting partition. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CoalescePartitionsExec { /// Input execution plan input: Arc, diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index e79b3c817bd1..9f3a76e28577 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -25,7 +25,7 @@ use arrow_schema::SchemaRef; use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan}; use datafusion_expr::display_schema; -use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr::LexOrdering; use super::{accept, ExecutionPlan, ExecutionPlanVisitor}; @@ -459,23 +459,6 @@ impl<'a> fmt::Display for ProjectSchemaDisplay<'a> { } } -/// A wrapper to customize output ordering display. -#[derive(Debug)] -pub struct OutputOrderingDisplay<'a>(pub &'a [PhysicalSortExpr]); - -impl<'a> fmt::Display for OutputOrderingDisplay<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "[")?; - for (i, e) in self.0.iter().enumerate() { - if i > 0 { - write!(f, ", ")? - } - write!(f, "{e}")?; - } - write!(f, "]") - } -} - pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::Result { if let Some(ordering) = orderings.first() { if !ordering.is_empty() { @@ -489,8 +472,8 @@ pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::R orderings.iter().enumerate().filter(|(_, o)| !o.is_empty()) { match idx { - 0 => write!(f, "{}", OutputOrderingDisplay(ordering))?, - _ => write!(f, ", {}", OutputOrderingDisplay(ordering))?, + 0 => write!(f, "[{}]", ordering)?, + _ => write!(f, ", [{}]", ordering)?, } } let end = if orderings.len() == 1 { "" } else { "]" }; diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index f6e0abb94fa8..192619f69f6a 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -35,7 +35,7 @@ use datafusion_physical_expr::EquivalenceProperties; use log::trace; /// Execution plan for empty relation with produce_one_row=false -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct EmptyExec { /// The schema for the produced row schema: SchemaRef, diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index e6484452d43e..d65320dbab68 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -37,8 +37,8 @@ pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ expressions, udf, Distribution, Partitioning, PhysicalExpr, }; -use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_expr_common::sort_expr::{LexOrderingRef, LexRequirement}; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::display::DisplayableExecutionPlan; @@ -443,7 +443,7 @@ pub trait ExecutionPlanProperties { /// For example, `SortExec` (obviously) produces sorted output as does /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted /// output if its input is sorted as it does not reorder the input rows. - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>; + fn output_ordering(&self) -> Option; /// Get the [`EquivalenceProperties`] within the plan. /// @@ -474,7 +474,7 @@ impl ExecutionPlanProperties for Arc { self.properties().execution_mode() } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { + fn output_ordering(&self) -> Option { self.properties().output_ordering() } @@ -492,7 +492,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan { self.properties().execution_mode() } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { + fn output_ordering(&self) -> Option { self.properties().output_ordering() } @@ -643,7 +643,7 @@ impl PlanProperties { &self.partitioning } - pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { + pub fn output_ordering(&self) -> Option { self.output_ordering.as_deref() } diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 30b0af19f43b..07898e8d22d8 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -54,7 +54,7 @@ use log::trace; /// FilterExec evaluates a boolean predicate against all input batches to determine which rows to /// include in its output batches. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FilterExec { /// The expression to filter on. This expression must evaluate to a boolean value. predicate: Arc, @@ -371,7 +371,12 @@ impl ExecutionPlan for FilterExec { /// The output statistics of a filtering operation can be estimated if the /// predicate's selectivity value can be determined for the incoming data. fn statistics(&self) -> Result { - Self::statistics_helper(&self.input, self.predicate(), self.default_selectivity) + let stats = Self::statistics_helper( + &self.input, + self.predicate(), + self.default_selectivity, + )?; + Ok(stats.project(self.projection.as_ref())) } fn cardinality_effect(&self) -> CardinalityEffect { diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index 8b3ef5ae01e4..e478cecb7ffc 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -79,6 +79,7 @@ pub type FileSinkExec = DataSinkExec; /// Execution plan for writing record batches to a [`DataSink`] /// /// Returns a single row with the number of values written +#[derive(Clone)] pub struct DataSinkExec { /// Input plan that produces the record batches to be written. input: Arc, diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 8f49885068fd..a67e1df47bc7 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -49,8 +49,13 @@ use futures::{ready, Stream, StreamExt, TryStreamExt}; /// Data of the left side type JoinLeftData = (RecordBatch, MemoryReservation); +#[allow(rustdoc::private_intra_doc_links)] /// executes partitions in parallel and combines them into a set of /// partitions by combining all values from the left with all values on the right +/// +/// Note that the `Clone` trait is not implemented for this struct due to the +/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the +/// left side with the processing in each output stream. #[derive(Debug)] pub struct CrossJoinExec { /// left (build) side which gets loaded in memory diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 2d11e03814a3..ae872e13a9f6 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -136,6 +136,7 @@ impl JoinLeftData { } } +#[allow(rustdoc::private_intra_doc_links)] /// Join execution plan: Evaluates eqijoin predicates in parallel on multiple /// partitions using a hash table and an optional filter list to apply post /// join. @@ -293,6 +294,10 @@ impl JoinLeftData { /// │ "dimension" │ │ "fact" │ /// └───────────────┘ └───────────────┘ /// ``` +/// +/// Note that the `Clone` trait is not implemented for this struct due to the +/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the +/// left side with the processing in each output stream. #[derive(Debug)] pub struct HashJoinExec { /// left (build) side which gets hashed @@ -524,6 +529,7 @@ impl HashJoinExec { | JoinType::Full | JoinType::LeftAnti | JoinType::LeftSemi + | JoinType::LeftMark )); let mode = if pipeline_breaking { @@ -779,7 +785,7 @@ impl ExecutionPlan for HashJoinExec { // TODO stats: it is not possible in general to know the output size of joins // There are some special cases though, for example: // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` - let mut stats = estimate_join_statistics( + let stats = estimate_join_statistics( Arc::clone(&self.left), Arc::clone(&self.right), self.on.clone(), @@ -787,16 +793,7 @@ impl ExecutionPlan for HashJoinExec { &self.join_schema, )?; // Project statistics if there is a projection - if let Some(projection) = &self.projection { - stats.column_statistics = stats - .column_statistics - .into_iter() - .enumerate() - .filter(|(i, _)| projection.contains(i)) - .map(|(_, s)| s) - .collect(); - } - Ok(stats) + Ok(stats.project(self.projection.as_ref())) } } @@ -3091,6 +3088,94 @@ mod tests { Ok(()) } + #[apply(batch_sizes)] + #[tokio::test] + async fn join_left_mark(batch_size: usize) -> Result<()> { + let task_ctx = prepare_task_ctx(batch_size); + let left = build_table( + ("a1", &vec![1, 2, 3]), + ("b1", &vec![4, 5, 7]), // 7 does not exist on the right + ("c1", &vec![7, 8, 9]), + ); + let right = build_table( + ("a2", &vec![10, 20, 30]), + ("b1", &vec![4, 5, 6]), + ("c2", &vec![70, 80, 90]), + ); + let on = vec![( + Arc::new(Column::new_with_schema("b1", &left.schema())?) as _, + Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, + )]; + + let (columns, batches) = join_collect( + Arc::clone(&left), + Arc::clone(&right), + on.clone(), + &JoinType::LeftMark, + false, + task_ctx, + ) + .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); + + let expected = [ + "+----+----+----+-------+", + "| a1 | b1 | c1 | mark |", + "+----+----+----+-------+", + "| 1 | 4 | 7 | true |", + "| 2 | 5 | 8 | true |", + "| 3 | 7 | 9 | false |", + "+----+----+----+-------+", + ]; + assert_batches_sorted_eq!(expected, &batches); + + Ok(()) + } + + #[apply(batch_sizes)] + #[tokio::test] + async fn partitioned_join_left_mark(batch_size: usize) -> Result<()> { + let task_ctx = prepare_task_ctx(batch_size); + let left = build_table( + ("a1", &vec![1, 2, 3]), + ("b1", &vec![4, 5, 7]), // 7 does not exist on the right + ("c1", &vec![7, 8, 9]), + ); + let right = build_table( + ("a2", &vec![10, 20, 30, 40]), + ("b1", &vec![4, 4, 5, 6]), + ("c2", &vec![60, 70, 80, 90]), + ); + let on = vec![( + Arc::new(Column::new_with_schema("b1", &left.schema())?) as _, + Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, + )]; + + let (columns, batches) = partitioned_join_collect( + Arc::clone(&left), + Arc::clone(&right), + on.clone(), + &JoinType::LeftMark, + false, + task_ctx, + ) + .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); + + let expected = [ + "+----+----+----+-------+", + "| a1 | b1 | c1 | mark |", + "+----+----+----+-------+", + "| 1 | 4 | 7 | true |", + "| 2 | 5 | 8 | true |", + "| 3 | 7 | 9 | false |", + "+----+----+----+-------+", + ]; + assert_batches_sorted_eq!(expected, &batches); + + Ok(()) + } + #[test] fn join_with_hash_collision() -> Result<()> { let mut hashmap_left = RawTable::with_capacity(2); @@ -3476,6 +3561,15 @@ mod tests { "| 30 | 6 | 90 |", "+----+----+----+", ]; + let expected_left_mark = vec![ + "+----+----+----+-------+", + "| a1 | b1 | c1 | mark |", + "+----+----+----+-------+", + "| 1 | 4 | 7 | true |", + "| 2 | 5 | 8 | true |", + "| 3 | 7 | 9 | false |", + "+----+----+----+-------+", + ]; let test_cases = vec![ (JoinType::Inner, expected_inner), @@ -3486,6 +3580,7 @@ mod tests { (JoinType::LeftAnti, expected_left_anti), (JoinType::RightSemi, expected_right_semi), (JoinType::RightAnti, expected_right_anti), + (JoinType::LeftMark, expected_left_mark), ]; for (join_type, expected) in test_cases { @@ -3768,6 +3863,7 @@ mod tests { JoinType::LeftAnti, JoinType::RightSemi, JoinType::RightAnti, + JoinType::LeftMark, ]; for join_type in join_types { diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 358ff02473a6..f36c2395e20f 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -105,6 +105,7 @@ impl JoinLeftData { } } +#[allow(rustdoc::private_intra_doc_links)] /// NestedLoopJoinExec is build-probe join operator, whose main task is to /// perform joins without any equijoin conditions in `ON` clause. /// @@ -140,6 +141,9 @@ impl JoinLeftData { /// "reports" about probe phase completion (which means that "visited" bitmap won't be /// updated anymore), and only the last thread, reporting about completion, will return output. /// +/// Note that the `Clone` trait is not implemented for this struct due to the +/// `left_fut` [`OnceAsync`], which is used to coordinate the loading of the +/// left side with the processing in each output stream. #[derive(Debug)] pub struct NestedLoopJoinExec { /// left side @@ -858,7 +862,7 @@ pub(crate) mod tests { use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; use datafusion_physical_expr::{Partitioning, PhysicalExpr}; - use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; + use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use rstest::rstest; @@ -888,7 +892,7 @@ pub(crate) mod tests { let mut exec = MemoryExec::try_new(&[batches], Arc::clone(&schema), None).unwrap(); if !sorted_column_names.is_empty() { - let mut sort_info = Vec::new(); + let mut sort_info = LexOrdering::default(); for name in sorted_column_names { let index = schema.index_of(name).unwrap(); let sort_expr = PhysicalSortExpr { @@ -1244,6 +1248,37 @@ pub(crate) mod tests { Ok(()) } + #[tokio::test] + async fn join_left_mark_with_filter() -> Result<()> { + let task_ctx = Arc::new(TaskContext::default()); + let left = build_left_table(); + let right = build_right_table(); + + let filter = prepare_join_filter(); + let (columns, batches) = multi_partitioned_join_collect( + left, + right, + &JoinType::LeftMark, + Some(filter), + task_ctx, + ) + .await?; + assert_eq!(columns, vec!["a1", "b1", "c1", "mark"]); + let expected = [ + "+----+----+-----+-------+", + "| a1 | b1 | c1 | mark |", + "+----+----+-----+-------+", + "| 11 | 8 | 110 | false |", + "| 5 | 5 | 50 | true |", + "| 9 | 8 | 90 | false |", + "+----+----+-----+-------+", + ]; + + assert_batches_sorted_eq!(expected, &batches); + + Ok(()) + } + #[tokio::test] async fn test_overallocation() -> Result<()> { let left = build_table( @@ -1269,6 +1304,7 @@ pub(crate) mod tests { JoinType::Full, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightSemi, JoinType::RightAnti, ]; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index b299b495c504..3ad892c880f6 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -35,7 +35,9 @@ use std::sync::Arc; use std::task::{Context, Poll}; use arrow::array::*; -use arrow::compute::{self, concat_batches, filter_record_batch, take, SortOptions}; +use arrow::compute::{ + self, concat_batches, filter_record_batch, is_not_null, take, SortOptions, +}; use arrow::datatypes::{DataType, SchemaRef, TimeUnit}; use arrow::error::ArrowError; use arrow::ipc::reader::FileReader; @@ -50,7 +52,7 @@ use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use futures::{Stream, StreamExt}; use hashbrown::HashSet; @@ -69,7 +71,7 @@ use crate::{ /// join execution plan executes partitions in parallel and combines them into a set of /// partitions. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SortMergeJoinExec { /// Left sorted joining execution plan pub left: Arc, @@ -86,9 +88,9 @@ pub struct SortMergeJoinExec { /// Execution metrics metrics: ExecutionPlanMetricsSet, /// The left SortExpr - left_sort_exprs: Vec, + left_sort_exprs: LexOrdering, /// The right SortExpr - right_sort_exprs: Vec, + right_sort_exprs: LexOrdering, /// Sort options of join columns used in sorting left and right execution plans pub sort_options: Vec, /// If null_equals_null is true, null == null else null != null @@ -157,8 +159,8 @@ impl SortMergeJoinExec { join_type, schema, metrics: ExecutionPlanMetricsSet::new(), - left_sort_exprs, - right_sort_exprs, + left_sort_exprs: LexOrdering::new(left_sort_exprs), + right_sort_exprs: LexOrdering::new(right_sort_exprs), sort_options, null_equals_null, cache, @@ -178,7 +180,8 @@ impl SortMergeJoinExec { | JoinType::Left | JoinType::Full | JoinType::LeftAnti - | JoinType::LeftSemi => JoinSide::Left, + | JoinType::LeftSemi + | JoinType::LeftMark => JoinSide::Left, } } @@ -186,7 +189,10 @@ impl SortMergeJoinExec { fn maintains_input_order(join_type: JoinType) -> Vec { match join_type { JoinType::Inner => vec![true, false], - JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => vec![true, false], + JoinType::Left + | JoinType::LeftSemi + | JoinType::LeftAnti + | JoinType::LeftMark => vec![true, false], JoinType::Right | JoinType::RightSemi | JoinType::RightAnti => { vec![false, true] } @@ -293,10 +299,10 @@ impl ExecutionPlan for SortMergeJoinExec { fn required_input_ordering(&self) -> Vec> { vec![ Some(PhysicalSortRequirement::from_sort_exprs( - &self.left_sort_exprs, + self.left_sort_exprs.iter(), )), Some(PhysicalSortRequirement::from_sort_exprs( - &self.right_sort_exprs, + self.right_sort_exprs.iter(), )), ] } @@ -784,6 +790,29 @@ fn get_corrected_filter_mask( corrected_mask.extend(vec![Some(false); null_matched]); Some(corrected_mask.finish()) } + JoinType::LeftMark => { + for i in 0..row_indices_length { + let last_index = + last_index_for_row(i, row_indices, batch_ids, row_indices_length); + if filter_mask.value(i) && !seen_true { + seen_true = true; + corrected_mask.append_value(true); + } else if seen_true || !filter_mask.value(i) && !last_index { + corrected_mask.append_null(); // to be ignored and not set to output + } else { + corrected_mask.append_value(false); // to be converted to null joined row + } + + if last_index { + seen_true = false; + } + } + + // Generate null joined rows for records which have no matching join key + let null_matched = expected_size - corrected_mask.len(); + corrected_mask.extend(vec![Some(false); null_matched]); + Some(corrected_mask.finish()) + } JoinType::LeftSemi => { for i in 0..row_indices_length { let last_index = @@ -860,6 +889,7 @@ impl Stream for SMJStream { self.join_type, JoinType::Left | JoinType::LeftSemi + | JoinType::LeftMark | JoinType::Right | JoinType::LeftAnti ) @@ -943,6 +973,7 @@ impl Stream for SMJStream { | JoinType::LeftSemi | JoinType::Right | JoinType::LeftAnti + | JoinType::LeftMark ) { continue; @@ -964,6 +995,7 @@ impl Stream for SMJStream { | JoinType::LeftSemi | JoinType::Right | JoinType::LeftAnti + | JoinType::LeftMark ) { let out = self.filter_joined_batch()?; @@ -1264,6 +1296,8 @@ impl SMJStream { let mut join_streamed = false; // Whether to join buffered rows let mut join_buffered = false; + // For Mark join we store a dummy id to indicate the the row has a match + let mut mark_row_as_match = false; // determine whether we need to join streamed/buffered rows match self.current_ordering { @@ -1275,12 +1309,14 @@ impl SMJStream { | JoinType::RightSemi | JoinType::Full | JoinType::LeftAnti + | JoinType::LeftMark ) { join_streamed = !self.streamed_joined; } } Ordering::Equal => { - if matches!(self.join_type, JoinType::LeftSemi) { + if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftMark) { + mark_row_as_match = matches!(self.join_type, JoinType::LeftMark); // if the join filter is specified then its needed to output the streamed index // only if it has not been emitted before // the `join_filter_matched_idxs` keeps track on if streamed index has a successful @@ -1357,9 +1393,11 @@ impl SMJStream { } else { Some(self.buffered_data.scanning_batch_idx) }; + // For Mark join we store a dummy id to indicate the the row has a match + let scanning_idx = mark_row_as_match.then_some(0); self.streamed_batch - .append_output_pair(scanning_batch_idx, None); + .append_output_pair(scanning_batch_idx, scanning_idx); self.output_size += 1; self.buffered_data.scanning_finish(); self.streamed_joined = true; @@ -1461,24 +1499,25 @@ impl SMJStream { // The row indices of joined buffered batch let buffered_indices: UInt64Array = chunk.buffered_indices.finish(); - let mut buffered_columns = - if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftAnti) { - vec![] - } else if let Some(buffered_idx) = chunk.buffered_batch_idx { - get_buffered_columns( - &self.buffered_data, - buffered_idx, - &buffered_indices, - )? - } else { - // If buffered batch none, meaning it is null joined batch. - // We need to create null arrays for buffered columns to join with streamed rows. - self.buffered_schema - .fields() - .iter() - .map(|f| new_null_array(f.data_type(), buffered_indices.len())) - .collect::>() - }; + let mut buffered_columns = if matches!(self.join_type, JoinType::LeftMark) { + vec![Arc::new(is_not_null(&buffered_indices)?) as ArrayRef] + } else if matches!(self.join_type, JoinType::LeftSemi | JoinType::LeftAnti) { + vec![] + } else if let Some(buffered_idx) = chunk.buffered_batch_idx { + get_buffered_columns( + &self.buffered_data, + buffered_idx, + &buffered_indices, + )? + } else { + // If buffered batch none, meaning it is null joined batch. + // We need to create null arrays for buffered columns to join with streamed rows. + create_unmatched_columns( + self.join_type, + &self.buffered_schema, + buffered_indices.len(), + ) + }; let streamed_columns_length = streamed_columns.len(); @@ -1489,7 +1528,7 @@ impl SMJStream { get_filter_column(&self.filter, &buffered_columns, &streamed_columns) } else if matches!( self.join_type, - JoinType::LeftSemi | JoinType::LeftAnti + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark ) { // unwrap is safe here as we check is_some on top of if statement let buffered_columns = get_buffered_columns( @@ -1517,7 +1556,6 @@ impl SMJStream { }; let output_batch = RecordBatch::try_new(Arc::clone(&self.schema), columns)?; - // Apply join filter if any if !filter_columns.is_empty() { if let Some(f) = &self.filter { @@ -1553,6 +1591,7 @@ impl SMJStream { | JoinType::LeftSemi | JoinType::Right | JoinType::LeftAnti + | JoinType::LeftMark ) { self.output_record_batches .batches @@ -1691,6 +1730,7 @@ impl SMJStream { | JoinType::LeftSemi | JoinType::Right | JoinType::LeftAnti + | JoinType::LeftMark )) { self.output_record_batches.batches.clear(); @@ -1721,16 +1761,18 @@ impl SMJStream { let buffered_columns_length = self.buffered_schema.fields.len(); let streamed_columns_length = self.streamed_schema.fields.len(); - if matches!(self.join_type, JoinType::Left | JoinType::Right) { + if matches!( + self.join_type, + JoinType::Left | JoinType::LeftMark | JoinType::Right + ) { let null_mask = compute::not(corrected_mask)?; let null_joined_batch = filter_record_batch(&record_batch, &null_mask)?; - let mut buffered_columns = self - .buffered_schema - .fields() - .iter() - .map(|f| new_null_array(f.data_type(), null_joined_batch.num_rows())) - .collect::>(); + let mut buffered_columns = create_unmatched_columns( + self.join_type, + &self.buffered_schema, + null_joined_batch.num_rows(), + ); let columns = if matches!(self.join_type, JoinType::Right) { let streamed_columns = null_joined_batch @@ -1777,6 +1819,22 @@ impl SMJStream { } } +fn create_unmatched_columns( + join_type: JoinType, + schema: &SchemaRef, + size: usize, +) -> Vec { + if matches!(join_type, JoinType::LeftMark) { + vec![Arc::new(BooleanArray::from(vec![false; size])) as ArrayRef] + } else { + schema + .fields() + .iter() + .map(|f| new_null_array(f.data_type(), size)) + .collect::>() + } +} + /// Gets the arrays which join filters are applied on. fn get_filter_column( join_filter: &Option, @@ -2716,6 +2774,39 @@ mod tests { Ok(()) } + #[tokio::test] + async fn join_left_mark() -> Result<()> { + let left = build_table( + ("a1", &vec![1, 2, 2, 3]), + ("b1", &vec![4, 5, 5, 7]), // 7 does not exist on the right + ("c1", &vec![7, 8, 8, 9]), + ); + let right = build_table( + ("a2", &vec![10, 20, 30, 40]), + ("b1", &vec![4, 4, 5, 6]), // 5 is double on the right + ("c2", &vec![60, 70, 80, 90]), + ); + let on = vec![( + Arc::new(Column::new_with_schema("b1", &left.schema())?) as _, + Arc::new(Column::new_with_schema("b1", &right.schema())?) as _, + )]; + + let (_, batches) = join_collect(left, right, on, LeftMark).await?; + let expected = [ + "+----+----+----+-------+", + "| a1 | b1 | c1 | mark |", + "+----+----+----+-------+", + "| 1 | 4 | 7 | true |", + "| 2 | 5 | 8 | true |", + "| 2 | 5 | 8 | true |", + "| 3 | 7 | 9 | false |", + "+----+----+----+-------+", + ]; + // The output order is important as SMJ preserves sortedness + assert_batches_eq!(expected, &batches); + Ok(()) + } + #[tokio::test] async fn join_with_duplicated_column_names() -> Result<()> { let left = build_table( @@ -3047,7 +3138,7 @@ mod tests { )]; let sort_options = vec![SortOptions::default(); on.len()]; - let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti]; + let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark]; // Disable DiskManager to prevent spilling let runtime = RuntimeEnvBuilder::new() @@ -3125,7 +3216,7 @@ mod tests { )]; let sort_options = vec![SortOptions::default(); on.len()]; - let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti]; + let join_types = vec![Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark]; // Disable DiskManager to prevent spilling let runtime = RuntimeEnvBuilder::new() @@ -3181,7 +3272,7 @@ mod tests { )]; let sort_options = vec![SortOptions::default(); on.len()]; - let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti]; + let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark]; // Enable DiskManager to allow spilling let runtime = RuntimeEnvBuilder::new() @@ -3282,7 +3373,7 @@ mod tests { )]; let sort_options = vec![SortOptions::default(); on.len()]; - let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti]; + let join_types = [Inner, Left, Right, Full, LeftSemi, LeftAnti, LeftMark]; // Enable DiskManager to allow spilling let runtime = RuntimeEnvBuilder::new() diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index 02c71dab3df2..5ccdd9b40dee 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -40,6 +40,7 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use hashbrown::raw::RawTable; use hashbrown::HashSet; @@ -744,8 +745,8 @@ pub fn prepare_sorted_exprs( filter: &JoinFilter, left: &Arc, right: &Arc, - left_sort_exprs: &[PhysicalSortExpr], - right_sort_exprs: &[PhysicalSortExpr], + left_sort_exprs: LexOrderingRef, + right_sort_exprs: LexOrderingRef, ) -> Result<(SortedFilterExpr, SortedFilterExpr, ExprIntervalGraph)> { let err = || { datafusion_common::plan_datafusion_err!("Filter does not include the child order") diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index eb6a30d17e92..5b6dc2cd2ae9 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -48,7 +48,6 @@ use crate::joins::utils::{ }; use crate::{ execution_mode_from_children, - expressions::PhysicalSortExpr, joins::StreamJoinPartitionMode, metrics::{ExecutionPlanMetricsSet, MetricsSet}, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, @@ -62,6 +61,7 @@ use arrow::array::{ use arrow::compute::concat_batches; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; +use arrow_buffer::ArrowNativeType; use datafusion_common::hash_utils::create_hashes; use datafusion_common::utils::bisect; use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result}; @@ -73,7 +73,9 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; use ahash::RandomState; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{ + LexOrdering, LexOrderingRef, LexRequirement, +}; use futures::{ready, Stream, StreamExt}; use hashbrown::HashSet; use parking_lot::Mutex; @@ -165,7 +167,7 @@ const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4; /// making the smallest value in 'left_sorted' 1231 and any rows below (since ascending) /// than that can be dropped from the inner buffer. /// ``` -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SymmetricHashJoinExec { /// Left side stream pub(crate) left: Arc, @@ -186,9 +188,9 @@ pub struct SymmetricHashJoinExec { /// If null_equals_null is true, null == null else null != null pub(crate) null_equals_null: bool, /// Left side sort expression(s) - pub(crate) left_sort_exprs: Option>, + pub(crate) left_sort_exprs: Option, /// Right side sort expression(s) - pub(crate) right_sort_exprs: Option>, + pub(crate) right_sort_exprs: Option, /// Partition Mode mode: StreamJoinPartitionMode, /// Cache holding plan properties like equivalences, output partitioning etc. @@ -210,8 +212,8 @@ impl SymmetricHashJoinExec { filter: Option, join_type: &JoinType, null_equals_null: bool, - left_sort_exprs: Option>, - right_sort_exprs: Option>, + left_sort_exprs: Option, + right_sort_exprs: Option, mode: StreamJoinPartitionMode, ) -> Result { let left_schema = left.schema(); @@ -318,12 +320,12 @@ impl SymmetricHashJoinExec { } /// Get left_sort_exprs - pub fn left_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> { + pub fn left_sort_exprs(&self) -> Option { self.left_sort_exprs.as_deref() } /// Get right_sort_exprs - pub fn right_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> { + pub fn right_sort_exprs(&self) -> Option { self.right_sort_exprs.as_deref() } @@ -416,9 +418,11 @@ impl ExecutionPlan for SymmetricHashJoinExec { vec![ self.left_sort_exprs .as_ref() + .map(LexOrdering::iter) .map(PhysicalSortRequirement::from_sort_exprs), self.right_sort_exprs .as_ref() + .map(LexOrdering::iter) .map(PhysicalSortRequirement::from_sort_exprs), ] } @@ -670,7 +674,11 @@ fn need_to_produce_result_in_final(build_side: JoinSide, join_type: JoinType) -> if build_side == JoinSide::Left { matches!( join_type, - JoinType::Left | JoinType::LeftAnti | JoinType::Full | JoinType::LeftSemi + JoinType::Left + | JoinType::LeftAnti + | JoinType::Full + | JoinType::LeftSemi + | JoinType::LeftMark ) } else { matches!( @@ -709,6 +717,20 @@ where { // Store the result in a tuple let result = match (build_side, join_type) { + (JoinSide::Left, JoinType::LeftMark) => { + let build_indices = (0..prune_length) + .map(L::Native::from_usize) + .collect::>(); + let probe_indices = (0..prune_length) + .map(|idx| { + // For mark join we output a dummy index 0 to indicate the row had a match + visited_rows + .contains(&(idx + deleted_offset)) + .then_some(R::Native::from_usize(0).unwrap()) + }) + .collect(); + (build_indices, probe_indices) + } // In the case of `Left` or `Right` join, or `Full` join, get the anti indices (JoinSide::Left, JoinType::Left | JoinType::LeftAnti) | (JoinSide::Right, JoinType::Right | JoinType::RightAnti) @@ -872,6 +894,7 @@ pub(crate) fn join_with_probe_batch( JoinType::LeftAnti | JoinType::RightAnti | JoinType::LeftSemi + | JoinType::LeftMark | JoinType::RightSemi ) { Ok(None) @@ -1626,6 +1649,7 @@ mod tests { use datafusion_execution::config::SessionConfig; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{binary, col, lit, Column}; + use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use once_cell::sync::Lazy; use rstest::*; @@ -1707,6 +1731,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -1725,7 +1750,7 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: binary( col("la1", left_schema)?, Operator::Plus, @@ -1733,11 +1758,11 @@ mod tests { left_schema, )?, options: SortOptions::default(), - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ra1", right_schema)?, options: SortOptions::default(), - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -1791,6 +1816,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -1803,14 +1829,14 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("la1", left_schema)?, options: SortOptions::default(), - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ra1", right_schema)?, options: SortOptions::default(), - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -1855,6 +1881,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -1906,6 +1933,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -1933,6 +1961,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -1943,20 +1972,20 @@ mod tests { let (left_partition, right_partition) = get_or_create_table((11, 21), 8)?; let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("la1_des", left_schema)?, options: SortOptions { descending: true, nulls_first: true, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ra1_des", right_schema)?, options: SortOptions { descending: true, nulls_first: true, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2001,20 +2030,20 @@ mod tests { let (left_partition, right_partition) = get_or_create_table((10, 11), 8)?; let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("l_asc_null_first", left_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("r_asc_null_first", right_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2059,20 +2088,20 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("l_asc_null_last", left_schema)?, options: SortOptions { descending: false, nulls_first: false, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("r_asc_null_last", right_schema)?, options: SortOptions { descending: false, nulls_first: false, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2119,20 +2148,20 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("l_desc_null_first", left_schema)?, options: SortOptions { descending: true, nulls_first: true, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("r_desc_null_first", right_schema)?, options: SortOptions { descending: true, nulls_first: true, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2180,15 +2209,15 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("la1", left_schema)?, options: SortOptions::default(), - }]; + }]); - let right_sorted = vec![PhysicalSortExpr { + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ra1", right_schema)?, options: SortOptions::default(), - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2238,20 +2267,20 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); let left_sorted = vec![ - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("la1", left_schema)?, options: SortOptions::default(), - }], - vec![PhysicalSortExpr { + }]), + LexOrdering::new(vec![PhysicalSortExpr { expr: col("la2", left_schema)?, options: SortOptions::default(), - }], + }]), ]; - let right_sorted = vec![PhysicalSortExpr { + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ra1", right_schema)?, options: SortOptions::default(), - }]; + }]); let (left, right) = create_memory_table( left_partition, @@ -2298,6 +2327,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -2317,20 +2347,20 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)]; - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("lt1", left_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("rt1", right_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2380,6 +2410,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -2398,20 +2429,20 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)]; - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("li1", left_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("ri1", right_schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, @@ -2454,6 +2485,7 @@ mod tests { JoinType::RightSemi, JoinType::LeftSemi, JoinType::LeftAnti, + JoinType::LeftMark, JoinType::RightAnti, JoinType::Full )] @@ -2472,14 +2504,14 @@ mod tests { let left_schema = &left_partition[0].schema(); let right_schema = &right_partition[0].schema(); - let left_sorted = vec![PhysicalSortExpr { + let left_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("l_float", left_schema)?, options: SortOptions::default(), - }]; - let right_sorted = vec![PhysicalSortExpr { + }]); + let right_sorted = LexOrdering::new(vec![PhysicalSortExpr { expr: col("r_float", right_schema)?, options: SortOptions::default(), - }]; + }]); let (left, right) = create_memory_table( left_partition, right_partition, diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs index 090d60f0bac3..421fd0da808c 100644 --- a/datafusion/physical-plan/src/joins/test_utils.rs +++ b/datafusion/physical-plan/src/joins/test_utils.rs @@ -101,8 +101,10 @@ pub async fn partitioned_sym_join_with_filter( filter, join_type, null_equals_null, - left.output_ordering().map(|p| p.to_vec()), - right.output_ordering().map(|p| p.to_vec()), + left.output_ordering().map(|p| LexOrdering::new(p.to_vec())), + right + .output_ordering() + .map(|p| LexOrdering::new(p.to_vec())), StreamJoinPartitionMode::Partitioned, )?; diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 090cf9aa628a..d3fa37c2ac80 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -20,6 +20,7 @@ use std::collections::HashSet; use std::fmt::{self, Debug}; use std::future::Future; +use std::iter::once; use std::ops::{IndexMut, Range}; use std::sync::Arc; use std::task::{Context, Poll}; @@ -448,10 +449,10 @@ pub fn adjust_right_output_partitioning( /// the left column (zeroth index in the tuple) inside `right_ordering`. fn replace_on_columns_of_right_ordering( on_columns: &[(PhysicalExprRef, PhysicalExprRef)], - right_ordering: &mut [PhysicalSortExpr], + right_ordering: &mut LexOrdering, ) -> Result<()> { for (left_col, right_col) in on_columns { - for item in right_ordering.iter_mut() { + for item in right_ordering.inner.iter_mut() { let new_expr = Arc::clone(&item.expr) .transform(|e| { if e.eq(right_col) { @@ -471,7 +472,7 @@ fn offset_ordering( ordering: LexOrderingRef, join_type: &JoinType, offset: usize, -) -> Vec { +) -> LexOrdering { match join_type { // In the case below, right ordering should be offsetted with the left // side length, since we append the right table to the left table. @@ -482,7 +483,7 @@ fn offset_ordering( options: sort_expr.options, }) .collect(), - _ => ordering.to_vec(), + _ => LexOrdering::from_ref(ordering), } } @@ -502,15 +503,16 @@ pub fn calculate_join_output_ordering( if join_type == JoinType::Inner && probe_side == Some(JoinSide::Left) { replace_on_columns_of_right_ordering( on_columns, - &mut right_ordering.to_vec(), + &mut LexOrdering::from_ref(right_ordering), ) .ok()?; merge_vectors( left_ordering, - &offset_ordering(right_ordering, &join_type, left_columns_len), + offset_ordering(right_ordering, &join_type, left_columns_len) + .as_ref(), ) } else { - left_ordering.to_vec() + LexOrdering::from_ref(left_ordering) } } [false, true] => { @@ -518,11 +520,12 @@ pub fn calculate_join_output_ordering( if join_type == JoinType::Inner && probe_side == Some(JoinSide::Right) { replace_on_columns_of_right_ordering( on_columns, - &mut right_ordering.to_vec(), + &mut LexOrdering::from_ref(right_ordering), ) .ok()?; merge_vectors( - &offset_ordering(right_ordering, &join_type, left_columns_len), + offset_ordering(right_ordering, &join_type, left_columns_len) + .as_ref(), left_ordering, ) } else { @@ -619,6 +622,7 @@ fn output_join_field(old_field: &Field, join_type: &JoinType, is_left: bool) -> JoinType::RightSemi => false, // doesn't introduce nulls JoinType::LeftAnti => false, // doesn't introduce nulls (or can it??) JoinType::RightAnti => false, // doesn't introduce nulls (or can it??) + JoinType::LeftMark => false, }; if force_nullable { @@ -635,44 +639,10 @@ pub fn build_join_schema( right: &Schema, join_type: &JoinType, ) -> (Schema, Vec) { - let (fields, column_indices): (SchemaBuilder, Vec) = match join_type { - JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => { - let left_fields = left - .fields() - .iter() - .map(|f| output_join_field(f, join_type, true)) - .enumerate() - .map(|(index, f)| { - ( - f, - ColumnIndex { - index, - side: JoinSide::Left, - }, - ) - }); - let right_fields = right - .fields() - .iter() - .map(|f| output_join_field(f, join_type, false)) - .enumerate() - .map(|(index, f)| { - ( - f, - ColumnIndex { - index, - side: JoinSide::Right, - }, - ) - }); - - // left then right - left_fields.chain(right_fields).unzip() - } - JoinType::LeftSemi | JoinType::LeftAnti => left - .fields() + let left_fields = || { + left.fields() .iter() - .cloned() + .map(|f| output_join_field(f, join_type, true)) .enumerate() .map(|(index, f)| { ( @@ -683,11 +653,13 @@ pub fn build_join_schema( }, ) }) - .unzip(), - JoinType::RightSemi | JoinType::RightAnti => right + }; + + let right_fields = || { + right .fields() .iter() - .cloned() + .map(|f| output_join_field(f, join_type, false)) .enumerate() .map(|(index, f)| { ( @@ -698,7 +670,25 @@ pub fn build_join_schema( }, ) }) - .unzip(), + }; + + let (fields, column_indices): (SchemaBuilder, Vec) = match join_type { + JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => { + // left then right + left_fields().chain(right_fields()).unzip() + } + JoinType::LeftSemi | JoinType::LeftAnti => left_fields().unzip(), + JoinType::LeftMark => { + let right_field = once(( + Field::new("mark", arrow_schema::DataType::Boolean, false), + ColumnIndex { + index: 0, + side: JoinSide::None, + }, + )); + left_fields().chain(right_field).unzip() + } + JoinType::RightSemi | JoinType::RightAnti => right_fields().unzip(), }; let metadata = left @@ -902,6 +892,16 @@ fn estimate_join_cardinality( column_statistics: outer_stats.column_statistics, }) } + + JoinType::LeftMark => { + let num_rows = *left_stats.num_rows.get_value()?; + let mut column_statistics = left_stats.column_statistics; + column_statistics.push(ColumnStatistics::new_unknown()); + Some(PartialJoinStatistics { + num_rows, + column_statistics, + }) + } } } @@ -1153,7 +1153,11 @@ impl OnceFut { pub(crate) fn need_produce_result_in_final(join_type: JoinType) -> bool { matches!( join_type, - JoinType::Left | JoinType::LeftAnti | JoinType::LeftSemi | JoinType::Full + JoinType::Left + | JoinType::LeftAnti + | JoinType::LeftSemi + | JoinType::LeftMark + | JoinType::Full ) } @@ -1171,6 +1175,13 @@ pub(crate) fn get_final_indices_from_bit_map( join_type: JoinType, ) -> (UInt64Array, UInt32Array) { let left_size = left_bit_map.len(); + if join_type == JoinType::LeftMark { + let left_indices = (0..left_size as u64).collect::(); + let right_indices = (0..left_size) + .map(|idx| left_bit_map.get_bit(idx).then_some(0)) + .collect::(); + return (left_indices, right_indices); + } let left_indices = if join_type == JoinType::LeftSemi { (0..left_size) .filter_map(|idx| (left_bit_map.get_bit(idx)).then_some(idx as u64)) @@ -1254,7 +1265,10 @@ pub(crate) fn build_batch_from_indices( let mut columns: Vec> = Vec::with_capacity(schema.fields().len()); for column_index in column_indices { - let array = if column_index.side == build_side { + let array = if column_index.side == JoinSide::None { + // LeftMark join, the mark column is a true if the indices is not null, otherwise it will be false + Arc::new(compute::is_not_null(probe_indices)?) + } else if column_index.side == build_side { let array = build_input_buffer.column(column_index.index); if array.is_empty() || build_indices.null_count() == build_indices.len() { // Outer join would generate a null index when finding no match at our side. @@ -1323,7 +1337,7 @@ pub(crate) fn adjust_indices_by_join_type( // the left_indices will not be used later for the `right anti` join Ok((left_indices, right_indices)) } - JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { // matched or unmatched left row will be produced in the end of loop // When visit the right batch, we can output the matched left row and don't need to wait the end of loop Ok(( @@ -1646,7 +1660,7 @@ pub(crate) fn symmetric_join_output_partitioning( let left_partitioning = left.output_partitioning(); let right_partitioning = right.output_partitioning(); match join_type { - JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => { + JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { left_partitioning.clone() } JoinType::RightSemi | JoinType::RightAnti => right_partitioning.clone(), @@ -1671,11 +1685,13 @@ pub(crate) fn asymmetric_join_output_partitioning( left.schema().fields().len(), ), JoinType::RightSemi | JoinType::RightAnti => right.output_partitioning().clone(), - JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::Full => { - Partitioning::UnknownPartitioning( - right.output_partitioning().partition_count(), - ) - } + JoinType::Left + | JoinType::LeftSemi + | JoinType::LeftAnti + | JoinType::Full + | JoinType::LeftMark => Partitioning::UnknownPartitioning( + right.output_partitioning().partition_count(), + ), } } @@ -2586,7 +2602,7 @@ mod tests { #[test] fn test_calculate_join_output_ordering() -> Result<()> { let options = SortOptions::default(); - let left_ordering = vec![ + let left_ordering = LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options, @@ -2599,8 +2615,8 @@ mod tests { expr: Arc::new(Column::new("d", 3)), options, }, - ]; - let right_ordering = vec![ + ]); + let right_ordering = LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("z", 2)), options, @@ -2609,7 +2625,7 @@ mod tests { expr: Arc::new(Column::new("y", 1)), options, }, - ]; + ]); let join_type = JoinType::Inner; let on_columns = [( Arc::new(Column::new("b", 1)) as _, @@ -2620,7 +2636,7 @@ mod tests { let probe_sides = [Some(JoinSide::Left), Some(JoinSide::Right)]; let expected = [ - Some(vec![ + Some(LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options, @@ -2641,8 +2657,8 @@ mod tests { expr: Arc::new(Column::new("y", 6)), options, }, - ]), - Some(vec![ + ])), + Some(LexOrdering::new(vec![ PhysicalSortExpr { expr: Arc::new(Column::new("z", 7)), options, @@ -2663,7 +2679,7 @@ mod tests { expr: Arc::new(Column::new("d", 3)), options, }, - ]), + ])), ]; for (i, (maintains_input_order, probe_side)) in @@ -2671,8 +2687,8 @@ mod tests { { assert_eq!( calculate_join_output_ordering( - &left_ordering, - &right_ordering, + left_ordering.as_ref(), + right_ordering.as_ref(), join_type, &on_columns, left_columns_len, diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 1fe550a93056..ab1e6cb37bc8 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -39,7 +39,7 @@ use futures::stream::{Stream, StreamExt}; use log::trace; /// Limit execution plan -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GlobalLimitExec { /// Input execution plan input: Arc, diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index dd4868d1bfcc..c9ada345afc7 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -22,7 +22,6 @@ use std::fmt; use std::sync::Arc; use std::task::{Context, Poll}; -use super::expressions::PhysicalSortExpr; use super::{ common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, @@ -41,6 +40,7 @@ use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use futures::Stream; /// Execution plan for reading in-memory batches of data +#[derive(Clone)] pub struct MemoryExec { /// The partitions to query partitions: Vec>, @@ -79,10 +79,7 @@ impl DisplayAs for MemoryExec { .sort_information .first() .map(|output_ordering| { - format!( - ", output_ordering={}", - PhysicalSortExpr::format_list(output_ordering) - ) + format!(", output_ordering={}", output_ordering) }) .unwrap_or_default(); @@ -216,7 +213,7 @@ impl MemoryExec { let fields = self.schema.fields(); let ambiguous_column = sort_information .iter() - .flatten() + .flat_map(|ordering| ordering.inner.clone()) .flat_map(|expr| collect_columns(&expr.expr)) .find(|col| { fields @@ -365,6 +362,7 @@ mod tests { use arrow_schema::{DataType, Field, Schema, SortOptions}; use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::PhysicalSortExpr; + use datafusion_physical_expr_common::sort_expr::LexOrdering; #[test] fn test_memory_order_eq() -> datafusion_common::Result<()> { @@ -373,7 +371,7 @@ mod tests { Field::new("b", DataType::Int64, false), Field::new("c", DataType::Int64, false), ])); - let sort1 = vec![ + let sort1 = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions::default(), @@ -382,12 +380,12 @@ mod tests { expr: col("b", &schema)?, options: SortOptions::default(), }, - ]; - let sort2 = vec![PhysicalSortExpr { + ]); + let sort2 = LexOrdering::new(vec![PhysicalSortExpr { expr: col("c", &schema)?, options: SortOptions::default(), - }]; - let mut expected_output_order = vec![]; + }]); + let mut expected_output_order = LexOrdering::default(); expected_output_order.extend(sort1.clone()); expected_output_order.extend(sort2.clone()); @@ -396,8 +394,8 @@ mod tests { .try_with_sort_information(sort_information)?; assert_eq!( - mem_exec.properties().output_ordering().unwrap(), - expected_output_order + mem_exec.properties().output_ordering().unwrap().to_vec(), + expected_output_order.inner ); let eq_properties = mem_exec.properties().equivalence_properties(); assert!(eq_properties.oeq_class().contains(&sort1)); diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index 5d8ca7e76935..f9437f46f8a6 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -37,7 +37,7 @@ use datafusion_physical_expr::EquivalenceProperties; use log::trace; /// Execution plan for empty relation with produce_one_row=true -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct PlaceholderRowExec { /// The schema for the produced row schema: SchemaRef, diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index e9ea9d4f5032..cbf22a4b392f 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -53,7 +53,7 @@ use futures::{ready, Stream, StreamExt}; /// Note that there won't be any limit or checks applied to detect /// an infinite recursion, so it is up to the planner to ensure that /// it won't happen. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct RecursiveQueryExec { /// Name of the query handler name: String, diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 601c1e873152..bc65b251561b 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -47,9 +47,10 @@ use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_common_runtime::SpawnedTask; use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr, PhysicalSortExpr}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use crate::execution_plan::CardinalityEffect; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use futures::stream::Stream; use futures::{FutureExt, StreamExt, TryStreamExt}; use hashbrown::HashMap; @@ -398,7 +399,7 @@ impl BatchPartitioner { /// Paper](https://w6113.github.io/files/papers/volcanoparallelism-89.pdf) /// which uses the term "Exchange" for the concept of repartitioning /// data across threads. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct RepartitionExec { /// Input execution plan input: Arc, @@ -502,11 +503,7 @@ impl DisplayAs for RepartitionExec { } if let Some(sort_exprs) = self.sort_exprs() { - write!( - f, - ", sort_exprs={}", - PhysicalSortExpr::format_list(sort_exprs) - )?; + write!(f, ", sort_exprs={}", LexOrdering::from_ref(sort_exprs))?; } Ok(()) } @@ -1561,10 +1558,10 @@ mod tests { mod test { use arrow_schema::{DataType, Field, Schema, SortOptions}; - use datafusion_physical_expr::expressions::col; - use crate::memory::MemoryExec; use crate::union::UnionExec; + use datafusion_physical_expr::expressions::col; + use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use super::*; @@ -1659,12 +1656,12 @@ mod test { Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])) } - fn sort_exprs(schema: &Schema) -> Vec { + fn sort_exprs(schema: &Schema) -> LexOrdering { let options = SortOptions::default(); - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("c0", schema).unwrap(), options, - }] + }]) } fn memory_exec(schema: &SchemaRef) -> Arc { @@ -1673,7 +1670,7 @@ mod test { fn sorted_memory_exec( schema: &SchemaRef, - sort_exprs: Vec, + sort_exprs: LexOrdering, ) -> Arc { Arc::new( MemoryExec::try_new(&[vec![]], Arc::clone(schema), None) diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index df90c97faf68..133d736c1467 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -38,6 +38,10 @@ pub trait CursorValues { /// Returns true if `l[l_idx] == r[r_idx]` fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool; + /// Returns true if `row[idx] == row[idx - 1]` + /// Given `idx` should be greater than 0 + fn eq_to_previous(cursor: &Self, idx: usize) -> bool; + /// Returns comparison of `l[l_idx]` and `r[r_idx]` fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering; } @@ -95,6 +99,16 @@ impl Cursor { self.offset += 1; t } + + pub fn is_eq_to_prev_one(&self, prev_cursor: Option<&Cursor>) -> bool { + if self.offset > 0 { + self.is_eq_to_prev_row() + } else if let Some(prev_cursor) = prev_cursor { + self.is_eq_to_prev_row_in_prev_batch(prev_cursor) + } else { + false + } + } } impl PartialEq for Cursor { @@ -103,6 +117,22 @@ impl PartialEq for Cursor { } } +impl Cursor { + fn is_eq_to_prev_row(&self) -> bool { + T::eq_to_previous(&self.values, self.offset) + } + + fn is_eq_to_prev_row_in_prev_batch(&self, other: &Self) -> bool { + assert_eq!(self.offset, 0); + T::eq( + &self.values, + self.offset, + &other.values, + other.values.len() - 1, + ) + } +} + impl Eq for Cursor {} impl PartialOrd for Cursor { @@ -156,6 +186,11 @@ impl CursorValues for RowValues { l.rows.row(l_idx) == r.rows.row(r_idx) } + fn eq_to_previous(cursor: &Self, idx: usize) -> bool { + assert!(idx > 0); + cursor.rows.row(idx) == cursor.rows.row(idx - 1) + } + fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.rows.row(l_idx).cmp(&r.rows.row(r_idx)) } @@ -188,6 +223,11 @@ impl CursorValues for PrimitiveValues { l.0[l_idx].is_eq(r.0[r_idx]) } + fn eq_to_previous(cursor: &Self, idx: usize) -> bool { + assert!(idx > 0); + cursor.0[idx].is_eq(cursor.0[idx - 1]) + } + fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.0[l_idx].compare(r.0[r_idx]) } @@ -219,6 +259,11 @@ impl CursorValues for ByteArrayValues { l.value(l_idx) == r.value(r_idx) } + fn eq_to_previous(cursor: &Self, idx: usize) -> bool { + assert!(idx > 0); + cursor.value(idx) == cursor.value(idx - 1) + } + fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.value(l_idx).cmp(r.value(r_idx)) } @@ -284,6 +329,15 @@ impl CursorValues for ArrayValues { } } + fn eq_to_previous(cursor: &Self, idx: usize) -> bool { + assert!(idx > 0); + match (cursor.is_null(idx), cursor.is_null(idx - 1)) { + (true, true) => true, + (false, false) => T::eq(&cursor.values, idx, &cursor.values, idx - 1), + _ => false, + } + } + fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { match (l.is_null(l_idx), r.is_null(r_idx)) { (true, true) => Ordering::Equal, diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs index e0644e3d99e5..458c1c29c0cf 100644 --- a/datafusion/physical-plan/src/sorts/merge.rs +++ b/datafusion/physical-plan/src/sorts/merge.rs @@ -97,6 +97,40 @@ pub(crate) struct SortPreservingMergeStream { /// Cursors for each input partition. `None` means the input is exhausted cursors: Vec>>, + /// Configuration parameter to enable round-robin selection of tied winners of loser tree. + /// + /// To address the issue of unbalanced polling between partitions due to tie-breakers being based + /// on partition index, especially in cases of low cardinality, we are making changes to the winner + /// selection mechanism. Previously, partitions with smaller indices were consistently chosen as the winners, + /// leading to an uneven distribution of polling. This caused upstream operator buffers for the other partitions + /// to grow excessively, as they continued receiving data without consuming it. + /// + /// For example, an upstream operator like a repartition execution would keep sending data to certain partitions, + /// but those partitions wouldn't consume the data if they weren't selected as winners. This resulted in inefficient buffer usage. + /// + /// To resolve this, we are modifying the tie-breaking logic. Instead of always choosing the partition with the smallest index, + /// we now select the partition that has the fewest poll counts for the same value. + /// This ensures that multiple partitions with the same value are chosen equally, distributing the polling load in a round-robin fashion. + /// This approach balances the workload more effectively across partitions and avoids excessive buffer growth. + enable_round_robin_tie_breaker: bool, + + /// Flag indicating whether we are in the mode of round-robin + /// tie breaker for the loser tree winners. + round_robin_tie_breaker_mode: bool, + + /// Total number of polls returning the same value, as per partition. + /// We select the one that has less poll counts for tie-breaker in loser tree. + num_of_polled_with_same_value: Vec, + + /// To keep track of reset counts + poll_reset_epochs: Vec, + + /// Current reset count + current_reset_epoch: usize, + + /// Stores the previous value of each partitions for tracking the poll counts on the same value. + prev_cursors: Vec>>, + /// Optional number of rows to fetch fetch: Option, @@ -118,6 +152,7 @@ impl SortPreservingMergeStream { batch_size: usize, fetch: Option, reservation: MemoryReservation, + enable_round_robin_tie_breaker: bool, ) -> Self { let stream_count = streams.partitions(); @@ -127,12 +162,18 @@ impl SortPreservingMergeStream { metrics, aborted: false, cursors: (0..stream_count).map(|_| None).collect(), + prev_cursors: (0..stream_count).map(|_| None).collect(), + round_robin_tie_breaker_mode: false, + num_of_polled_with_same_value: vec![0; stream_count], + current_reset_epoch: 0, + poll_reset_epochs: vec![0; stream_count], loser_tree: vec![], loser_tree_adjusted: false, batch_size, fetch, produced: 0, uninitiated_partitions: (0..stream_count).collect(), + enable_round_robin_tie_breaker, } } @@ -218,7 +259,7 @@ impl SortPreservingMergeStream { } let stream_idx = self.loser_tree[0]; - if self.advance(stream_idx) { + if self.advance_cursors(stream_idx) { self.loser_tree_adjusted = false; self.in_progress.push_row(stream_idx); @@ -236,27 +277,53 @@ impl SortPreservingMergeStream { } } + /// For the given partition, updates the poll count. If the current value is the same + /// of the previous value, it increases the count by 1; otherwise, it is reset as 0. + fn update_poll_count_on_the_same_value(&mut self, partition_idx: usize) { + let cursor = &mut self.cursors[partition_idx]; + + // Check if the current partition's poll count is logically "reset" + if self.poll_reset_epochs[partition_idx] != self.current_reset_epoch { + self.poll_reset_epochs[partition_idx] = self.current_reset_epoch; + self.num_of_polled_with_same_value[partition_idx] = 0; + } + + if let Some(c) = cursor.as_mut() { + // Compare with the last row in the previous batch + let prev_cursor = &self.prev_cursors[partition_idx]; + if c.is_eq_to_prev_one(prev_cursor.as_ref()) { + self.num_of_polled_with_same_value[partition_idx] += 1; + } else { + self.num_of_polled_with_same_value[partition_idx] = 0; + } + } + } + fn fetch_reached(&mut self) -> bool { self.fetch .map(|fetch| self.produced + self.in_progress.len() >= fetch) .unwrap_or(false) } - fn advance(&mut self, stream_idx: usize) -> bool { - let slot = &mut self.cursors[stream_idx]; - match slot.as_mut() { - Some(c) => { - c.advance(); - if c.is_finished() { - *slot = None; - } - true + /// Advances the actual cursor. If it reaches its end, update the + /// previous cursor with it. + /// + /// If the given partition is not exhausted, the function returns `true`. + fn advance_cursors(&mut self, stream_idx: usize) -> bool { + if let Some(cursor) = &mut self.cursors[stream_idx] { + let _ = cursor.advance(); + if cursor.is_finished() { + // Take the current cursor, leaving `None` in its place + self.prev_cursors[stream_idx] = self.cursors[stream_idx].take(); } - None => false, + true + } else { + false } } - /// Returns `true` if the cursor at index `a` is greater than at index `b` + /// Returns `true` if the cursor at index `a` is greater than at index `b`. + /// In an equality case, it compares the partition indices given. #[inline] fn is_gt(&self, a: usize, b: usize) -> bool { match (&self.cursors[a], &self.cursors[b]) { @@ -266,6 +333,19 @@ impl SortPreservingMergeStream { } } + #[inline] + fn is_poll_count_gt(&self, a: usize, b: usize) -> bool { + let poll_a = self.num_of_polled_with_same_value[a]; + let poll_b = self.num_of_polled_with_same_value[b]; + poll_a.cmp(&poll_b).then_with(|| a.cmp(&b)).is_gt() + } + + #[inline] + fn update_winner(&mut self, cmp_node: usize, winner: &mut usize, challenger: usize) { + self.loser_tree[cmp_node] = *winner; + *winner = challenger; + } + /// Find the leaf node index in the loser tree for the given cursor index /// /// Note that this is not necessarily a leaf node in the tree, but it can @@ -327,16 +407,101 @@ impl SortPreservingMergeStream { self.loser_tree_adjusted = true; } - /// Attempts to update the loser tree, following winner replacement, if possible + /// Resets the poll count by incrementing the reset epoch. + fn reset_poll_counts(&mut self) { + self.current_reset_epoch += 1; + } + + /// Handles tie-breaking logic during the adjustment of the loser tree. + /// + /// When comparing elements from multiple partitions in the `update_loser_tree` process, a tie can occur + /// between the current winner and a challenger. This function is invoked when such a tie needs to be + /// resolved according to the round-robin tie-breaker mode. + /// + /// If round-robin tie-breaking is not active, it is enabled, and the poll counts for all elements are reset. + /// The function then compares the poll counts of the current winner and the challenger: + /// - If the winner remains at the top after the final comparison, it increments the winner's poll count. + /// - If the challenger has a lower poll count than the current winner, the challenger becomes the new winner. + /// - If the poll counts are equal but the challenger's index is smaller, the challenger is preferred. + /// + /// # Parameters + /// - `cmp_node`: The index of the comparison node in the loser tree where the tie-breaking is happening. + /// - `winner`: A mutable reference to the current winner, which may be updated based on the tie-breaking result. + /// - `challenger`: The index of the challenger being compared against the winner. + /// + /// This function ensures fair selection among elements with equal values when tie-breaking mode is enabled, + /// aiming to balance the polling across different partitions. + #[inline] + fn handle_tie(&mut self, cmp_node: usize, winner: &mut usize, challenger: usize) { + if !self.round_robin_tie_breaker_mode { + self.round_robin_tie_breaker_mode = true; + // Reset poll count for tie-breaker + self.reset_poll_counts(); + } + // Update poll count if the winner survives in the final match + if *winner == self.loser_tree[0] { + self.update_poll_count_on_the_same_value(*winner); + if self.is_poll_count_gt(*winner, challenger) { + self.update_winner(cmp_node, winner, challenger); + } + } else if challenger < *winner { + // If the winner doesn’t survive in the final match, it indicates that the original winner + // has moved up in value, so the challenger now becomes the new winner. + // This also means that we’re in a new round of the tie breaker, + // and the polls count is outdated (though not yet cleaned up). + // + // By the time we reach this code, both the new winner and the current challenger + // have the same value, and neither has an updated polls count. + // Therefore, we simply select the one with the smaller index. + self.update_winner(cmp_node, winner, challenger); + } + } + + /// Updates the loser tree to reflect the new winner after the previous winner is consumed. + /// This function adjusts the tree by comparing the current winner with challengers from + /// other partitions. + /// + /// If `enable_round_robin_tie_breaker` is true and a tie occurs at the final level, the + /// tie-breaker logic will be applied to ensure fair selection among equal elements. fn update_loser_tree(&mut self) { + // Start with the current winner let mut winner = self.loser_tree[0]; - // Replace overall winner by walking tree of losers + + // Find the leaf node index of the winner in the loser tree. let mut cmp_node = self.lt_leaf_node_index(winner); + + // Traverse up the tree to adjust comparisons until reaching the root. while cmp_node != 0 { let challenger = self.loser_tree[cmp_node]; - if self.is_gt(winner, challenger) { - self.loser_tree[cmp_node] = winner; - winner = challenger; + // If round-robin tie-breaker is enabled and we're at the final comparison (cmp_node == 1) + if self.enable_round_robin_tie_breaker && cmp_node == 1 { + match (&self.cursors[winner], &self.cursors[challenger]) { + (Some(ac), Some(bc)) => { + let ord = ac.cmp(bc); + if ord.is_eq() { + self.handle_tie(cmp_node, &mut winner, challenger); + } else { + // Ends of tie breaker + self.round_robin_tie_breaker_mode = false; + if ord.is_gt() { + self.update_winner(cmp_node, &mut winner, challenger); + } + } + } + (None, _) => { + // Challenger wins, update winner + // Ends of tie breaker + self.round_robin_tie_breaker_mode = false; + self.update_winner(cmp_node, &mut winner, challenger); + } + (_, None) => { + // Winner wins again + // Ends of tie breaker + self.round_robin_tie_breaker_mode = false; + } + } + } else if self.is_gt(winner, challenger) { + self.update_winner(cmp_node, &mut winner, challenger); } cmp_node = self.lt_parent_node_index(cmp_node); } diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 649c05d52e8b..8f853464c9bd 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -57,7 +57,6 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use crate::expressions::PhysicalSortExpr; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::sorts::sort::sort_batch; use crate::{ @@ -73,6 +72,7 @@ use datafusion_common::Result; use datafusion_execution::{RecordBatchStream, TaskContext}; use datafusion_physical_expr::LexOrdering; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use futures::{ready, Stream, StreamExt}; use log::trace; @@ -82,7 +82,7 @@ pub struct PartialSortExec { /// Input schema pub(crate) input: Arc, /// Sort expressions - expr: Vec, + expr: LexOrdering, /// Length of continuous matching columns of input that satisfy /// the required ordering for the sort common_prefix_length: usize, @@ -100,7 +100,7 @@ pub struct PartialSortExec { impl PartialSortExec { /// Create a new partial sort execution plan pub fn new( - expr: Vec, + expr: LexOrdering, input: Arc, common_prefix_length: usize, ) -> Self { @@ -159,8 +159,8 @@ impl PartialSortExec { } /// Sort expressions - pub fn expr(&self) -> &[PhysicalSortExpr] { - &self.expr + pub fn expr(&self) -> LexOrderingRef { + self.expr.as_ref() } /// If `Some(fetch)`, limits output to only the first "fetch" items @@ -212,13 +212,12 @@ impl DisplayAs for PartialSortExec { ) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { - let expr = PhysicalSortExpr::format_list(&self.expr); let common_prefix_length = self.common_prefix_length; match self.fetch { Some(fetch) => { - write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{expr}], common_prefix_length=[{common_prefix_length}]", ) + write!(f, "PartialSortExec: TopK(fetch={fetch}), expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr) } - None => write!(f, "PartialSortExec: expr=[{expr}], common_prefix_length=[{common_prefix_length}]"), + None => write!(f, "PartialSortExec: expr=[{}], common_prefix_length=[{common_prefix_length}]", self.expr), } } } @@ -315,7 +314,7 @@ struct PartialSortStream { /// The input plan input: SendableRecordBatchStream, /// Sort expressions - expr: Vec, + expr: LexOrdering, /// Length of prefix common to input ordering and required ordering of plan /// should be more than 0 otherwise PartialSort is not applicable common_prefix_length: usize, @@ -394,7 +393,7 @@ impl PartialSortStream { fn sort_in_mem_batches(self: &mut Pin<&mut Self>) -> Result { let input_batch = concat_batches(&self.schema(), &self.in_mem_batches)?; self.in_mem_batches.clear(); - let result = sort_batch(&input_batch, &self.expr, self.fetch)?; + let result = sort_batch(&input_batch, self.expr.as_ref(), self.fetch)?; if let Some(remaining_fetch) = self.fetch { // remaining_fetch - result.num_rows() is always be >= 0 // because result length of sort_batch with limit cannot be @@ -448,6 +447,7 @@ mod tests { use crate::collect; use crate::expressions::col; + use crate::expressions::PhysicalSortExpr; use crate::memory::MemoryExec; use crate::sorts::sort::SortExec; use crate::test; @@ -475,7 +475,7 @@ mod tests { }; let partial_sort_exec = Arc::new(PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -488,7 +488,7 @@ mod tests { expr: col("c", &schema)?, options: option_asc, }, - ], + ]), Arc::clone(&source), 2, )) as Arc; @@ -539,7 +539,7 @@ mod tests { for common_prefix_length in [1, 2] { let partial_sort_exec = Arc::new( PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -552,7 +552,7 @@ mod tests { expr: col("c", &schema)?, options: option_asc, }, - ], + ]), Arc::clone(&source), common_prefix_length, ) @@ -611,7 +611,7 @@ mod tests { [(1, &source_tables[0]), (2, &source_tables[1])] { let partial_sort_exec = Arc::new(PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -624,7 +624,7 @@ mod tests { expr: col("c", &schema)?, options: option_asc, }, - ], + ]), Arc::clone(source), common_prefix_length, )); @@ -701,7 +701,7 @@ mod tests { }; let schema = mem_exec.schema(); let partial_sort_executor = PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -714,7 +714,7 @@ mod tests { expr: col("c", &schema)?, options: option_asc, }, - ], + ]), Arc::clone(&mem_exec), 1, ); @@ -762,7 +762,7 @@ mod tests { (Some(250), vec![0, 125, 125]), ] { let partial_sort_executor = PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -775,7 +775,7 @@ mod tests { expr: col("c", &schema)?, options: option_asc, }, - ], + ]), Arc::clone(&mem_exec), 1, ) @@ -834,10 +834,10 @@ mod tests { )?); let partial_sort_exec = Arc::new(PartialSortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("field_name", &schema)?, options: SortOptions::default(), - }], + }]), input, 1, )); @@ -923,7 +923,7 @@ mod tests { )?; let partial_sort_exec = Arc::new(PartialSortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: option_asc, @@ -936,7 +936,7 @@ mod tests { expr: col("c", &schema)?, options: option_desc, }, - ], + ]), Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?), 2, )); @@ -1000,10 +1000,10 @@ mod tests { let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1)); let refs = blocking_exec.refs(); let sort_exec = Arc::new(PartialSortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions::default(), - }], + }]), blocking_exec, 1, )); diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 921678a4ad92..d90d0f64ceb4 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -52,7 +52,9 @@ use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; use datafusion_physical_expr::LexOrdering; -use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement; +use datafusion_physical_expr_common::sort_expr::{ + LexOrderingRef, PhysicalSortRequirement, +}; use crate::execution_plan::CardinalityEffect; use futures::{StreamExt, TryStreamExt}; @@ -243,7 +245,7 @@ impl ExternalSorter { pub fn new( partition_id: usize, schema: SchemaRef, - expr: Vec, + expr: LexOrdering, batch_size: usize, fetch: Option, sort_spill_reservation_bytes: usize, @@ -265,7 +267,7 @@ impl ExternalSorter { in_mem_batches: vec![], in_mem_batches_sorted: true, spills: vec![], - expr: expr.into(), + expr: expr.inner.into(), metrics, fetch, reservation, @@ -345,7 +347,7 @@ impl ExternalSorter { StreamingMergeBuilder::new() .with_streams(streams) .with_schema(Arc::clone(&self.schema)) - .with_expressions(&self.expr) + .with_expressions(self.expr.to_vec().as_slice()) .with_metrics(self.metrics.baseline.clone()) .with_batch_size(self.batch_size) .with_fetch(self.fetch) @@ -537,7 +539,7 @@ impl ExternalSorter { StreamingMergeBuilder::new() .with_streams(streams) .with_schema(Arc::clone(&self.schema)) - .with_expressions(&self.expr) + .with_expressions(self.expr.as_ref()) .with_metrics(metrics) .with_batch_size(self.batch_size) .with_fetch(self.fetch) @@ -601,7 +603,7 @@ impl Debug for ExternalSorter { pub fn sort_batch( batch: &RecordBatch, - expressions: &[PhysicalSortExpr], + expressions: LexOrderingRef, fetch: Option, ) -> Result { let sort_columns = expressions @@ -673,12 +675,12 @@ pub(crate) fn lexsort_to_indices_multi_columns( /// /// Support sorting datasets that are larger than the memory allotted /// by the memory manager, by spilling to disk. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SortExec { /// Input schema pub(crate) input: Arc, /// Sort expressions - expr: Vec, + expr: LexOrdering, /// Containing all metrics set created during sort metrics_set: ExecutionPlanMetricsSet, /// Preserve partitions of input plan. If false, the input partitions @@ -693,7 +695,7 @@ pub struct SortExec { impl SortExec { /// Create a new sort execution plan that produces a single, /// sorted output partition. - pub fn new(expr: Vec, input: Arc) -> Self { + pub fn new(expr: LexOrdering, input: Arc) -> Self { let preserve_partitioning = false; let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning); Self { @@ -760,8 +762,8 @@ impl SortExec { } /// Sort expressions - pub fn expr(&self) -> &[PhysicalSortExpr] { - &self.expr + pub fn expr(&self) -> LexOrderingRef { + self.expr.as_ref() } /// If `Some(fetch)`, limits output to only the first "fetch" items @@ -818,13 +820,12 @@ impl DisplayAs for SortExec { fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { - let expr = PhysicalSortExpr::format_list(&self.expr); let preserve_partitioning = self.preserve_partitioning; match self.fetch { Some(fetch) => { - write!(f, "SortExec: TopK(fetch={fetch}), expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]",) + write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr) } - None => write!(f, "SortExec: expr=[{expr}], preserve_partitioning=[{preserve_partitioning}]"), + None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr), } } } @@ -1027,9 +1028,9 @@ mod tests { impl SortedUnboundedExec { fn compute_properties(schema: SchemaRef) -> PlanProperties { let mut eq_properties = EquivalenceProperties::new(schema); - eq_properties.add_new_orderings(vec![vec![PhysicalSortExpr::new_default( - Arc::new(Column::new("c1", 0)), - )]]); + eq_properties.add_new_orderings(vec![LexOrdering::new(vec![ + PhysicalSortExpr::new_default(Arc::new(Column::new("c1", 0))), + ])]); let mode = ExecutionMode::Unbounded; PlanProperties::new(eq_properties, Partitioning::UnknownPartitioning(1), mode) } @@ -1123,10 +1124,10 @@ mod tests { let schema = csv.schema(); let sort_exec = Arc::new(SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema)?, options: SortOptions::default(), - }], + }]), Arc::new(CoalescePartitionsExec::new(csv)), )); @@ -1166,10 +1167,10 @@ mod tests { let schema = input.schema(); let sort_exec = Arc::new(SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema)?, options: SortOptions::default(), - }], + }]), Arc::new(CoalescePartitionsExec::new(input)), )); @@ -1245,10 +1246,10 @@ mod tests { let sort_exec = Arc::new( SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema)?, options: SortOptions::default(), - }], + }]), Arc::new(CoalescePartitionsExec::new(csv)), ) .with_fetch(fetch), @@ -1294,10 +1295,10 @@ mod tests { ); let sort_exec = Arc::new(SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("field_name", &schema)?, options: SortOptions::default(), - }], + }]), input, )); @@ -1345,7 +1346,7 @@ mod tests { )?; let sort_exec = Arc::new(SortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions { @@ -1360,7 +1361,7 @@ mod tests { nulls_first: false, }, }, - ], + ]), Arc::new(MemoryExec::try_new( &[vec![batch]], Arc::clone(&schema), @@ -1435,7 +1436,7 @@ mod tests { )?; let sort_exec = Arc::new(SortExec::new( - vec![ + LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions { @@ -1450,7 +1451,7 @@ mod tests { nulls_first: false, }, }, - ], + ]), Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?), )); @@ -1514,10 +1515,10 @@ mod tests { let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1)); let refs = blocking_exec.refs(); let sort_exec = Arc::new(SortExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions::default(), - }], + }]), blocking_exec, )); @@ -1545,12 +1546,12 @@ mod tests { RecordBatch::try_new_with_options(Arc::clone(&schema), vec![], &options) .unwrap(); - let expressions = vec![PhysicalSortExpr { + let expressions = LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Literal::new(ScalarValue::Int64(Some(1)))), options: SortOptions::default(), - }]; + }]); - let result = sort_batch(&batch, &expressions, None).unwrap(); + let result = sort_batch(&batch, expressions.as_ref(), None).unwrap(); assert_eq!(result.num_rows(), 1); } @@ -1564,9 +1565,9 @@ mod tests { cache: SortedUnboundedExec::compute_properties(Arc::new(schema.clone())), }; let mut plan = SortExec::new( - vec![PhysicalSortExpr::new_default(Arc::new(Column::new( + LexOrdering::new(vec![PhysicalSortExpr::new_default(Arc::new(Column::new( "c1", 0, - )))], + )))]), Arc::new(source), ); plan = plan.with_fetch(Some(9)); diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 31a4ed61cf9e..9ee0faaa0a44 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -21,7 +21,6 @@ use std::any::Any; use std::sync::Arc; use crate::common::spawn_buffered; -use crate::expressions::PhysicalSortExpr; use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::sorts::streaming_merge::StreamingMergeBuilder; @@ -35,7 +34,9 @@ use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalSortRequirement; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{ + LexOrdering, LexOrderingRef, LexRequirement, +}; use log::{debug, trace}; /// Sort preserving merge execution plan @@ -70,23 +71,25 @@ use log::{debug, trace}; /// /// If any of the input partitions return an error, the error is propagated to /// the output and inputs are not polled again. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SortPreservingMergeExec { /// Input plan input: Arc, /// Sort expressions - expr: Vec, + expr: LexOrdering, /// Execution metrics metrics: ExecutionPlanMetricsSet, /// Optional number of rows to fetch. Stops producing rows after this fetch fetch: Option, /// Cache holding plan properties like equivalences, output partitioning etc. cache: PlanProperties, + /// Configuration parameter to enable round-robin selection of tied winners of loser tree. + enable_round_robin_repartition: bool, } impl SortPreservingMergeExec { /// Create a new sort execution plan - pub fn new(expr: Vec, input: Arc) -> Self { + pub fn new(expr: LexOrdering, input: Arc) -> Self { let cache = Self::compute_properties(&input, expr.clone()); Self { input, @@ -94,21 +97,32 @@ impl SortPreservingMergeExec { metrics: ExecutionPlanMetricsSet::new(), fetch: None, cache, + enable_round_robin_repartition: true, } } + /// Sets the number of rows to fetch pub fn with_fetch(mut self, fetch: Option) -> Self { self.fetch = fetch; self } + /// Sets the selection strategy of tied winners of the loser tree algorithm + pub fn with_round_robin_repartition( + mut self, + enable_round_robin_repartition: bool, + ) -> Self { + self.enable_round_robin_repartition = enable_round_robin_repartition; + self + } + /// Input schema pub fn input(&self) -> &Arc { &self.input } /// Sort expressions - pub fn expr(&self) -> &[PhysicalSortExpr] { + pub fn expr(&self) -> LexOrderingRef { &self.expr } @@ -120,7 +134,7 @@ impl SortPreservingMergeExec { /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, - ordering: Vec, + ordering: LexOrdering, ) -> PlanProperties { let mut eq_properties = input.equivalence_properties().clone(); eq_properties.clear_per_partition_constants(); @@ -141,11 +155,7 @@ impl DisplayAs for SortPreservingMergeExec { ) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!( - f, - "SortPreservingMergeExec: [{}]", - PhysicalSortExpr::format_list(&self.expr) - )?; + write!(f, "SortPreservingMergeExec: [{}]", self.expr)?; if let Some(fetch) = self.fetch { write!(f, ", fetch={fetch}")?; }; @@ -182,6 +192,7 @@ impl ExecutionPlan for SortPreservingMergeExec { metrics: self.metrics.clone(), fetch: limit, cache: self.cache.clone(), + enable_round_robin_repartition: true, })) } @@ -194,7 +205,9 @@ impl ExecutionPlan for SortPreservingMergeExec { } fn required_input_ordering(&self) -> Vec> { - vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))] + vec![Some(PhysicalSortRequirement::from_sort_exprs( + self.expr.iter(), + ))] } fn maintains_input_order(&self) -> Vec { @@ -276,11 +289,12 @@ impl ExecutionPlan for SortPreservingMergeExec { let result = StreamingMergeBuilder::new() .with_streams(receivers) .with_schema(schema) - .with_expressions(&self.expr) + .with_expressions(self.expr.as_ref()) .with_metrics(BaselineMetrics::new(&self.metrics, partition)) .with_batch_size(context.session_config().batch_size()) .with_fetch(self.fetch) .with_reservation(reservation) + .with_round_robin_tie_breaker(self.enable_round_robin_repartition) .build()?; debug!("Got stream result from SortPreservingMergeStream::new_from_receivers"); @@ -312,10 +326,12 @@ mod tests { use std::time::Duration; use super::*; + use crate::coalesce_batches::CoalesceBatchesExec; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::expressions::col; use crate::memory::MemoryExec; use crate::metrics::{MetricValue, Timestamp}; + use crate::repartition::RepartitionExec; use crate::sorts::sort::SortExec; use crate::stream::RecordBatchReceiverStream; use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; @@ -326,18 +342,95 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; + use arrow_array::Int64Array; use arrow_schema::SchemaRef; use datafusion_common::{assert_batches_eq, assert_contains, DataFusionError}; use datafusion_common_runtime::SpawnedTask; use datafusion_execution::config::SessionConfig; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_execution::RecordBatchStream; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; + use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use futures::{FutureExt, Stream, StreamExt}; use tokio::time::timeout; + // The number in the function is highly related to the memory limit we are testing + // any change of the constant should be aware of + fn generate_task_ctx_for_round_robin_tie_breaker() -> Result> { + let runtime = RuntimeEnvBuilder::new() + .with_memory_limit(20_000_000, 1.0) + .build_arc()?; + let config = SessionConfig::new(); + let task_ctx = TaskContext::default() + .with_runtime(runtime) + .with_session_config(config); + Ok(Arc::new(task_ctx)) + } + // The number in the function is highly related to the memory limit we are testing, + // any change of the constant should be aware of + fn generate_spm_for_round_robin_tie_breaker( + enable_round_robin_repartition: bool, + ) -> Result> { + let target_batch_size = 12500; + let row_size = 12500; + let a: ArrayRef = Arc::new(Int32Array::from(vec![1; row_size])); + let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a"); row_size])); + let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size])); + let rb = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap(); + + let rbs = (0..1024).map(|_| rb.clone()).collect::>(); + + let schema = rb.schema(); + let sort = LexOrdering::new(vec![ + PhysicalSortExpr { + expr: col("b", &schema).unwrap(), + options: Default::default(), + }, + PhysicalSortExpr { + expr: col("c", &schema).unwrap(), + options: Default::default(), + }, + ]); + + let exec = MemoryExec::try_new(&[rbs], schema, None).unwrap(); + let repartition_exec = + RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(2))?; + let coalesce_batches_exec = + CoalesceBatchesExec::new(Arc::new(repartition_exec), target_batch_size); + let spm = SortPreservingMergeExec::new(sort, Arc::new(coalesce_batches_exec)) + .with_round_robin_repartition(enable_round_robin_repartition); + Ok(Arc::new(spm)) + } + + /// This test verifies that memory usage stays within limits when the tie breaker is enabled. + /// Any errors here could indicate unintended changes in tie breaker logic. + /// + /// Note: If you adjust constants in this test, ensure that memory usage differs + /// based on whether the tie breaker is enabled or disabled. + #[tokio::test(flavor = "multi_thread")] + async fn test_round_robin_tie_breaker_success() -> Result<()> { + let task_ctx = generate_task_ctx_for_round_robin_tie_breaker()?; + let spm = generate_spm_for_round_robin_tie_breaker(true)?; + let _collected = collect(spm, task_ctx).await.unwrap(); + Ok(()) + } + + /// This test verifies that memory usage stays within limits when the tie breaker is enabled. + /// Any errors here could indicate unintended changes in tie breaker logic. + /// + /// Note: If you adjust constants in this test, ensure that memory usage differs + /// based on whether the tie breaker is enabled or disabled. + #[tokio::test(flavor = "multi_thread")] + async fn test_round_robin_tie_breaker_fail() -> Result<()> { + let task_ctx = generate_task_ctx_for_round_robin_tie_breaker()?; + let spm = generate_spm_for_round_robin_tie_breaker(false)?; + let _err = collect(spm, task_ctx).await.unwrap_err(); + Ok(()) + } + #[tokio::test] async fn test_merge_interleave() { let task_ctx = Arc::new(TaskContext::default()); @@ -393,7 +486,7 @@ mod tests { let batch = RecordBatch::try_from_iter(vec![("a", a)]).unwrap(); let schema = batch.schema(); - let sort = vec![]; // no sort expressions + let sort = LexOrdering::default(); // no sort expressions let exec = MemoryExec::try_new(&[vec![batch.clone()], vec![batch]], schema, None) .unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -572,7 +665,7 @@ mod tests { context: Arc, ) { let schema = partitions[0][0].schema(); - let sort = vec![ + let sort = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("b", &schema).unwrap(), options: Default::default(), @@ -581,7 +674,7 @@ mod tests { expr: col("c", &schema).unwrap(), options: Default::default(), }, - ]; + ]); let exec = MemoryExec::try_new(partitions, schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -591,7 +684,7 @@ mod tests { async fn sorted_merge( input: Arc, - sort: Vec, + sort: LexOrdering, context: Arc, ) -> RecordBatch { let merge = Arc::new(SortPreservingMergeExec::new(sort, input)); @@ -602,7 +695,7 @@ mod tests { async fn partition_sort( input: Arc, - sort: Vec, + sort: LexOrdering, context: Arc, ) -> RecordBatch { let sort_exec = @@ -612,7 +705,7 @@ mod tests { async fn basic_sort( src: Arc, - sort: Vec, + sort: LexOrdering, context: Arc, ) -> RecordBatch { let merge = Arc::new(CoalescePartitionsExec::new(src)); @@ -629,13 +722,13 @@ mod tests { let csv = test::scan_partitioned(partitions); let schema = csv.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema).unwrap(), options: SortOptions { descending: true, nulls_first: true, }, - }]; + }]); let basic = basic_sort(Arc::clone(&csv), sort.clone(), Arc::clone(&task_ctx)).await; @@ -680,7 +773,7 @@ mod tests { } async fn sorted_partitioned_input( - sort: Vec, + sort: LexOrdering, sizes: &[usize], context: Arc, ) -> Result> { @@ -699,10 +792,10 @@ mod tests { async fn test_partition_sort_streaming_input() -> Result<()> { let task_ctx = Arc::new(TaskContext::default()); let schema = make_partition(11).schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema).unwrap(), options: Default::default(), - }]; + }]); let input = sorted_partitioned_input(sort.clone(), &[10, 3, 11], Arc::clone(&task_ctx)) @@ -729,10 +822,10 @@ mod tests { #[tokio::test] async fn test_partition_sort_streaming_input_output() -> Result<()> { let schema = make_partition(11).schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema).unwrap(), options: Default::default(), - }]; + }]); // Test streaming with default batch size let task_ctx = Arc::new(TaskContext::default()); @@ -804,7 +897,7 @@ mod tests { let b2 = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap(); let schema = b1.schema(); - let sort = vec![ + let sort = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("b", &schema).unwrap(), options: SortOptions { @@ -819,7 +912,7 @@ mod tests { nulls_first: false, }, }, - ]; + ]); let exec = MemoryExec::try_new(&[vec![b1], vec![b2]], schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -855,13 +948,13 @@ mod tests { let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap(); let schema = batch.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("b", &schema).unwrap(), options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap(); let merge = Arc::new( SortPreservingMergeExec::new(sort, Arc::new(exec)).with_fetch(Some(2)), @@ -891,13 +984,13 @@ mod tests { let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap(); let schema = batch.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("b", &schema).unwrap(), options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -924,10 +1017,10 @@ mod tests { async fn test_async() -> Result<()> { let task_ctx = Arc::new(TaskContext::default()); let schema = make_partition(11).schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("i", &schema).unwrap(), options: SortOptions::default(), - }]; + }]); let batches = sorted_partitioned_input(sort.clone(), &[5, 7, 3], Arc::clone(&task_ctx)) @@ -963,7 +1056,7 @@ mod tests { let merge_stream = StreamingMergeBuilder::new() .with_streams(streams) .with_schema(batches.schema()) - .with_expressions(sort.as_slice()) + .with_expressions(sort.as_ref()) .with_metrics(BaselineMetrics::new(&metrics, 0)) .with_batch_size(task_ctx.session_config().batch_size()) .with_fetch(fetch) @@ -1003,10 +1096,10 @@ mod tests { let b2 = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap(); let schema = b1.schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("b", &schema).unwrap(), options: Default::default(), - }]; + }]); let exec = MemoryExec::try_new(&[vec![b1], vec![b2]], schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -1062,10 +1155,10 @@ mod tests { let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 2)); let refs = blocking_exec.refs(); let sort_preserving_merge_exec = Arc::new(SortPreservingMergeExec::new( - vec![PhysicalSortExpr { + LexOrdering::new(vec![PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions::default(), - }], + }]), blocking_exec, )); @@ -1110,13 +1203,13 @@ mod tests { let schema = partitions[0][0].schema(); - let sort = vec![PhysicalSortExpr { + let sort = LexOrdering::new(vec![PhysicalSortExpr { expr: col("value", &schema).unwrap(), options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let exec = MemoryExec::try_new(&partitions, schema, None).unwrap(); let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); @@ -1179,7 +1272,7 @@ mod tests { eq_properties.add_new_orderings(vec![columns .iter() .map(|expr| PhysicalSortExpr::new_default(Arc::clone(expr))) - .collect::>()]); + .collect::()]); let mode = ExecutionMode::Unbounded; PlanProperties::new(eq_properties, Partitioning::Hash(columns, 3), mode) } @@ -1288,9 +1381,9 @@ mod tests { congestion_cleared: Arc::new(Mutex::new(false)), }; let spm = SortPreservingMergeExec::new( - vec![PhysicalSortExpr::new_default(Arc::new(Column::new( + LexOrdering::new(vec![PhysicalSortExpr::new_default(Arc::new(Column::new( "c1", 0, - )))], + )))]), Arc::new(source), ); let spm_task = SpawnedTask::spawn(collect(Arc::new(spm), task_ctx)); diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs index c7924edfb1eb..70beb2c4a91b 100644 --- a/datafusion/physical-plan/src/sorts/stream.rs +++ b/datafusion/physical-plan/src/sorts/stream.rs @@ -24,6 +24,7 @@ use arrow::record_batch::RecordBatch; use arrow::row::{RowConverter, SortField}; use datafusion_common::Result; use datafusion_execution::memory_pool::MemoryReservation; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use futures::stream::{Fuse, StreamExt}; use std::marker::PhantomData; use std::sync::Arc; @@ -92,7 +93,7 @@ pub struct RowCursorStream { impl RowCursorStream { pub fn try_new( schema: &Schema, - expressions: &[PhysicalSortExpr], + expressions: LexOrderingRef, streams: Vec, reservation: MemoryReservation, ) -> Result { diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs index ad640d8e8470..bd74685eac94 100644 --- a/datafusion/physical-plan/src/sorts/streaming_merge.rs +++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs @@ -23,11 +23,12 @@ use crate::sorts::{ merge::SortPreservingMergeStream, stream::{FieldCursorStream, RowCursorStream}, }; -use crate::{PhysicalSortExpr, SendableRecordBatchStream}; +use crate::SendableRecordBatchStream; use arrow::datatypes::{DataType, SchemaRef}; use arrow_array::*; use datafusion_common::{internal_err, Result}; use datafusion_execution::memory_pool::MemoryReservation; +use datafusion_physical_expr_common::sort_expr::LexOrderingRef; macro_rules! primitive_merge_helper { ($t:ty, $($v:ident),+) => { @@ -36,7 +37,7 @@ macro_rules! primitive_merge_helper { } macro_rules! merge_helper { - ($t:ty, $sort:ident, $streams:ident, $schema:ident, $tracking_metrics:ident, $batch_size:ident, $fetch:ident, $reservation:ident) => {{ + ($t:ty, $sort:ident, $streams:ident, $schema:ident, $tracking_metrics:ident, $batch_size:ident, $fetch:ident, $reservation:ident, $enable_round_robin_tie_breaker:ident) => {{ let streams = FieldCursorStream::<$t>::new($sort, $streams); return Ok(Box::pin(SortPreservingMergeStream::new( Box::new(streams), @@ -45,6 +46,7 @@ macro_rules! merge_helper { $batch_size, $fetch, $reservation, + $enable_round_robin_tie_breaker, ))); }}; } @@ -53,16 +55,20 @@ macro_rules! merge_helper { pub struct StreamingMergeBuilder<'a> { streams: Vec, schema: Option, - expressions: &'a [PhysicalSortExpr], + expressions: LexOrderingRef<'a>, metrics: Option, batch_size: Option, fetch: Option, reservation: Option, + enable_round_robin_tie_breaker: bool, } impl<'a> StreamingMergeBuilder<'a> { pub fn new() -> Self { - Self::default() + Self { + enable_round_robin_tie_breaker: true, + ..Default::default() + } } pub fn with_streams(mut self, streams: Vec) -> Self { @@ -75,7 +81,7 @@ impl<'a> StreamingMergeBuilder<'a> { self } - pub fn with_expressions(mut self, expressions: &'a [PhysicalSortExpr]) -> Self { + pub fn with_expressions(mut self, expressions: LexOrderingRef<'a>) -> Self { self.expressions = expressions; self } @@ -100,6 +106,14 @@ impl<'a> StreamingMergeBuilder<'a> { self } + pub fn with_round_robin_tie_breaker( + mut self, + enable_round_robin_tie_breaker: bool, + ) -> Self { + self.enable_round_robin_tie_breaker = enable_round_robin_tie_breaker; + self + } + pub fn build(self) -> Result { let Self { streams, @@ -109,6 +123,7 @@ impl<'a> StreamingMergeBuilder<'a> { reservation, fetch, expressions, + enable_round_robin_tie_breaker, } = self; // Early return if streams or expressions are empty @@ -141,11 +156,11 @@ impl<'a> StreamingMergeBuilder<'a> { let sort = expressions[0].clone(); let data_type = sort.expr.data_type(schema.as_ref())?; downcast_primitive! { - data_type => (primitive_merge_helper, sort, streams, schema, metrics, batch_size, fetch, reservation), - DataType::Utf8 => merge_helper!(StringArray, sort, streams, schema, metrics, batch_size, fetch, reservation) - DataType::LargeUtf8 => merge_helper!(LargeStringArray, sort, streams, schema, metrics, batch_size, fetch, reservation) - DataType::Binary => merge_helper!(BinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation) - DataType::LargeBinary => merge_helper!(LargeBinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation) + data_type => (primitive_merge_helper, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker), + DataType::Utf8 => merge_helper!(StringArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker) + DataType::LargeUtf8 => merge_helper!(LargeStringArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker) + DataType::Binary => merge_helper!(BinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker) + DataType::LargeBinary => merge_helper!(LargeBinaryArray, sort, streams, schema, metrics, batch_size, fetch, reservation, enable_round_robin_tie_breaker) _ => {} } } @@ -163,6 +178,7 @@ impl<'a> StreamingMergeBuilder<'a> { batch_size, fetch, reservation, + enable_round_robin_tie_breaker, ))) } } diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index cdb94af1fe8a..7ccef3248069 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -55,6 +55,7 @@ pub trait PartitionStream: Debug + Send + Sync { /// /// If your source can be represented as one or more [`PartitionStream`]s, you can /// use this struct to implement [`ExecutionPlan`]. +#[derive(Clone)] pub struct StreamingTableExec { partitions: Vec>, projection: Option>, diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs index 9b46ad2ec7b1..14469ab6c0d9 100644 --- a/datafusion/physical-plan/src/topk/mod.rs +++ b/datafusion/physical-plan/src/topk/mod.rs @@ -24,6 +24,7 @@ use arrow::{ use std::mem::size_of; use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc}; +use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream}; use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::SchemaRef; use datafusion_common::Result; @@ -32,10 +33,9 @@ use datafusion_execution::{ runtime_env::RuntimeEnv, }; use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr_common::sort_expr::LexOrdering; use hashbrown::HashMap; -use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream}; - use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder}; /// Global TopK @@ -101,7 +101,7 @@ impl TopK { pub fn try_new( partition_id: usize, schema: SchemaRef, - expr: Vec, + expr: LexOrdering, k: usize, batch_size: usize, runtime: Arc, @@ -111,7 +111,7 @@ impl TopK { let reservation = MemoryConsumer::new(format!("TopK[{partition_id}]")) .register(&runtime.memory_pool); - let expr: Arc<[PhysicalSortExpr]> = expr.into(); + let expr: Arc<[PhysicalSortExpr]> = expr.inner.into(); let sort_fields: Vec<_> = expr .iter() diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 433dda870def..bd36753880eb 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -85,7 +85,7 @@ use tokio::macros::support::thread_rng_n; /// │Input 1 │ │Input 2 │ /// └─────────────────┘ └──────────────────┘ /// ``` -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct UnionExec { /// Input execution plan inputs: Vec>, @@ -298,7 +298,7 @@ impl ExecutionPlan for UnionExec { /// | |-----------------+ /// +---------+ /// ``` -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct InterleaveExec { /// Input execution plan inputs: Vec>, @@ -607,6 +607,7 @@ mod tests { use datafusion_common::ScalarValue; use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; + use datafusion_physical_expr_common::sort_expr::LexOrdering; // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g) fn create_test_schema() -> Result { @@ -625,14 +626,14 @@ mod tests { // Convert each tuple to PhysicalSortExpr fn convert_to_sort_exprs( in_data: &[(&Arc, SortOptions)], - ) -> Vec { + ) -> LexOrdering { in_data .iter() .map(|(expr, options)| PhysicalSortExpr { expr: Arc::clone(*expr), options: *options, }) - .collect::>() + .collect::() } #[tokio::test] diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 3e312b7451be..b7b9f17eb1b6 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -56,7 +56,7 @@ use log::trace; /// Thus the original RecordBatch with dimension (n x m) may have new dimension (n' x m') /// /// See [`UnnestOptions`] for more details and an example. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct UnnestExec { /// Input execution plan input: Arc, diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs index 991146d245a7..edadf98cb10c 100644 --- a/datafusion/physical-plan/src/values.rs +++ b/datafusion/physical-plan/src/values.rs @@ -36,7 +36,7 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; /// Execution plan for values list based relation (produces constant rows) -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ValuesExec { /// The schema schema: SchemaRef, diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index 6495657339fa..8c0331f94570 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -28,7 +28,6 @@ use std::sync::Arc; use std::task::{Context, Poll}; use super::utils::create_schema; -use crate::expressions::PhysicalSortExpr; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, @@ -60,7 +59,7 @@ use datafusion_physical_expr::window::{ PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState, }; use datafusion_physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use futures::stream::Stream; use futures::{ready, StreamExt}; use hashbrown::raw::RawTable; @@ -68,7 +67,7 @@ use indexmap::IndexMap; use log::debug; /// Window execution plan -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct BoundedWindowAggExec { /// Input plan input: Arc, @@ -149,7 +148,7 @@ impl BoundedWindowAggExec { // We are sure that partition by columns are always at the beginning of sort_keys // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely // to calculate partition separation points - pub fn partition_by_sort_keys(&self) -> Result> { + pub fn partition_by_sort_keys(&self) -> Result { let partition_by = self.window_expr()[0].partition_by(); get_partition_by_sort_exprs( &self.input, @@ -261,7 +260,7 @@ impl ExecutionPlan for BoundedWindowAggExec { .ordered_partition_by_indices .iter() .map(|idx| &partition_bys[*idx]); - vec![calc_requirements(partition_bys, order_keys)] + vec![calc_requirements(partition_bys, order_keys.iter())] } fn required_input_distribution(&self) -> Vec { @@ -707,7 +706,7 @@ impl LinearSearch { /// when computing partitions. pub struct SortedSearch { /// Stores partition by columns and their ordering information - partition_by_sort_keys: Vec, + partition_by_sort_keys: LexOrdering, /// Input ordering and partition by key ordering need not be the same, so /// this vector stores the mapping between them. For instance, if the input /// is ordered by a, b and the window expression contains a PARTITION BY b, a @@ -1160,6 +1159,7 @@ mod tests { use std::time::Duration; use crate::common::collect; + use crate::expressions::PhysicalSortExpr; use crate::memory::MemoryExec; use crate::projection::ProjectionExec; use crate::streaming::{PartitionStream, StreamingTableExec}; @@ -1184,8 +1184,9 @@ mod tests { use datafusion_physical_expr::window::{ BuiltInWindowExpr, BuiltInWindowFunctionExpr, }; - use datafusion_physical_expr::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; + use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; + use datafusion_physical_expr_common::sort_expr::LexOrderingRef; use futures::future::Shared; use futures::{pin_mut, ready, FutureExt, Stream, StreamExt}; use itertools::Itertools; @@ -1286,10 +1287,10 @@ mod tests { Arc::new(Column::new(schema.fields[0].name(), 0)) as Arc; let args = vec![col_expr]; let partitionby_exprs = vec![col(hash, &schema)?]; - let orderby_exprs = vec![PhysicalSortExpr { + let orderby_exprs = LexOrdering::new(vec![PhysicalSortExpr { expr: col(order_by, &schema)?, options: SortOptions::default(), - }]; + }]); let window_frame = WindowFrame::new_bounds( WindowFrameUnits::Range, WindowFrameBound::CurrentRow, @@ -1306,7 +1307,7 @@ mod tests { fn_name, &args, &partitionby_exprs, - &orderby_exprs, + orderby_exprs.as_ref(), Arc::new(window_frame), &input.schema(), false, @@ -1403,13 +1404,13 @@ mod tests { } fn schema_orders(schema: &SchemaRef) -> Result> { - let orderings = vec![vec![PhysicalSortExpr { + let orderings = vec![LexOrdering::new(vec![PhysicalSortExpr { expr: col("sn", schema)?, options: SortOptions { descending: false, nulls_first: false, }, - }]]; + }])]; Ok(orderings) } @@ -1552,7 +1553,7 @@ mod tests { Arc::new(BuiltInWindowExpr::new( last_value_func, &[], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new_bounds( WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::UInt64(None)), @@ -1563,7 +1564,7 @@ mod tests { Arc::new(BuiltInWindowExpr::new( nth_value_func1, &[], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new_bounds( WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::UInt64(None)), @@ -1574,7 +1575,7 @@ mod tests { Arc::new(BuiltInWindowExpr::new( nth_value_func2, &[], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new_bounds( WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::UInt64(None)), @@ -1716,8 +1717,8 @@ mod tests { let plan = projection_exec(window)?; let expected_plan = vec![ - "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]", - " BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]", + "ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]@2 as col_2]", + " BoundedWindowAggExec: wdw=[count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]: Ok(Field { name: \"count([Column { name: \\\"sn\\\", index: 0 }]) PARTITION BY: [[Column { name: \\\"hash\\\", index: 1 }]], ORDER BY: [LexOrdering { inner: [PhysicalSortExpr { expr: Column { name: \\\"sn\\\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }] }]\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Linear]", " StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]", ]; diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 7ebb7e71ec57..217823fb6a0a 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -53,7 +53,7 @@ use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, }; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{LexOrderingRef, LexRequirement}; pub use window_agg_exec::WindowAggExec; /// Build field from window function and add it into schema @@ -98,7 +98,7 @@ pub fn create_window_expr( name: String, args: &[Arc], partition_by: &[Arc], - order_by: &[PhysicalSortExpr], + order_by: LexOrderingRef, window_frame: Arc, input_schema: &Schema, ignore_nulls: bool, @@ -139,7 +139,7 @@ pub fn create_window_expr( /// Creates an appropriate [`WindowExpr`] based on the window frame and fn window_expr_from_aggregate_expr( partition_by: &[Arc], - order_by: &[PhysicalSortExpr], + order_by: LexOrderingRef, window_frame: Arc, aggregate: Arc, ) -> Arc { @@ -497,7 +497,7 @@ pub fn get_best_fitting_window( /// the mode this window operator should work in to accommodate the existing ordering. pub fn get_window_mode( partitionby_exprs: &[Arc], - orderby_keys: &[PhysicalSortExpr], + orderby_keys: LexOrderingRef, input: &Arc, ) -> Option<(bool, InputOrderMode)> { let input_eqs = input.equivalence_properties().clone(); @@ -516,9 +516,9 @@ pub fn get_window_mode( // Treat partition by exprs as constant. During analysis of requirements are satisfied. let const_exprs = partitionby_exprs.iter().map(ConstExpr::from); let partition_by_eqs = input_eqs.with_constants(const_exprs); - let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys); + let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys.iter()); let reverse_order_by_reqs = - PhysicalSortRequirement::from_sort_exprs(&reverse_order_bys(orderby_keys)); + PhysicalSortRequirement::from_sort_exprs(reverse_order_bys(orderby_keys).iter()); for (should_swap, order_by_reqs) in [(false, order_by_reqs), (true, reverse_order_by_reqs)] { @@ -699,7 +699,7 @@ mod tests { "count".to_owned(), &[col("a", &schema)?], &[], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new(None)), schema.as_ref(), false, @@ -896,7 +896,7 @@ mod tests { partition_by_exprs.push(col(col_name, &test_schema)?); } - let mut order_by_exprs = vec![]; + let mut order_by_exprs = LexOrdering::default(); for col_name in order_by_params { let expr = col(col_name, &test_schema)?; // Give default ordering, this is same with input ordering direction @@ -904,8 +904,11 @@ mod tests { let options = SortOptions::default(); order_by_exprs.push(PhysicalSortExpr { expr, options }); } - let res = - get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded); + let res = get_window_mode( + &partition_by_exprs, + order_by_exprs.as_ref(), + &exec_unbounded, + ); // Since reversibility is not important in this test. Convert Option<(bool, InputOrderMode)> to Option let res = res.map(|(_, mode)| mode); assert_eq!( @@ -1058,7 +1061,7 @@ mod tests { partition_by_exprs.push(col(col_name, &test_schema)?); } - let mut order_by_exprs = vec![]; + let mut order_by_exprs = LexOrdering::default(); for (col_name, descending, nulls_first) in order_by_params { let expr = col(col_name, &test_schema)?; let options = SortOptions { @@ -1069,7 +1072,7 @@ mod tests { } assert_eq!( - get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded), + get_window_mode(&partition_by_exprs, order_by_exprs.as_ref(), &exec_unbounded), *expected, "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}" ); diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index afe9700ed08c..f71a0b9fd095 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -23,7 +23,6 @@ use std::sync::Arc; use std::task::{Context, Poll}; use super::utils::create_schema; -use crate::expressions::PhysicalSortExpr; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, @@ -43,11 +42,11 @@ use datafusion_common::stats::Precision; use datafusion_common::utils::{evaluate_partition_ranges, transpose}; use datafusion_common::{internal_err, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr_common::sort_expr::LexRequirement; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use futures::{ready, Stream, StreamExt}; /// Window execution plan -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct WindowAggExec { /// Input plan pub(crate) input: Arc, @@ -105,7 +104,7 @@ impl WindowAggExec { // We are sure that partition by columns are always at the beginning of sort_keys // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely // to calculate partition separation points - pub fn partition_by_sort_keys(&self) -> Result> { + pub fn partition_by_sort_keys(&self) -> Result { let partition_by = self.window_expr()[0].partition_by(); get_partition_by_sort_exprs( &self.input, @@ -195,13 +194,13 @@ impl ExecutionPlan for WindowAggExec { let partition_bys = self.window_expr()[0].partition_by(); let order_keys = self.window_expr()[0].order_by(); if self.ordered_partition_by_indices.len() < partition_bys.len() { - vec![calc_requirements(partition_bys, order_keys)] + vec![calc_requirements(partition_bys, order_keys.iter())] } else { let partition_bys = self .ordered_partition_by_indices .iter() .map(|idx| &partition_bys[*idx]); - vec![calc_requirements(partition_bys, order_keys)] + vec![calc_requirements(partition_bys, order_keys.iter())] } } @@ -282,7 +281,7 @@ pub struct WindowAggStream { batches: Vec, finished: bool, window_expr: Vec>, - partition_by_sort_keys: Vec, + partition_by_sort_keys: LexOrdering, baseline_metrics: BaselineMetrics, ordered_partition_by_indices: Vec, } @@ -294,7 +293,7 @@ impl WindowAggStream { window_expr: Vec>, input: SendableRecordBatchStream, baseline_metrics: BaselineMetrics, - partition_by_sort_keys: Vec, + partition_by_sort_keys: LexOrdering, ordered_partition_by_indices: Vec, ) -> Result { // In WindowAggExec all partition by columns should be ordered. diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 7f8bce6b206e..65cd33d523cd 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -84,6 +84,7 @@ enum JoinType { LEFTANTI = 5; RIGHTSEMI = 6; RIGHTANTI = 7; + LEFTMARK = 8; } enum JoinConstraint { @@ -541,9 +542,10 @@ message ParquetOptions { string created_by = 16; } -enum JoinSide{ +enum JoinSide { LEFT_SIDE = 0; RIGHT_SIDE = 1; + NONE = 2; } message Precision{ diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index d848f795c684..a554e4ed2805 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -778,6 +778,7 @@ impl From for JoinSide { match t { protobuf::JoinSide::LeftSide => JoinSide::Left, protobuf::JoinSide::RightSide => JoinSide::Right, + protobuf::JoinSide::None => JoinSide::None, } } } diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index e8b46fbf7012..e8235ef7b9dd 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -3761,6 +3761,7 @@ impl serde::Serialize for JoinSide { let variant = match self { Self::LeftSide => "LEFT_SIDE", Self::RightSide => "RIGHT_SIDE", + Self::None => "NONE", }; serializer.serialize_str(variant) } @@ -3774,6 +3775,7 @@ impl<'de> serde::Deserialize<'de> for JoinSide { const FIELDS: &[&str] = &[ "LEFT_SIDE", "RIGHT_SIDE", + "NONE", ]; struct GeneratedVisitor; @@ -3816,6 +3818,7 @@ impl<'de> serde::Deserialize<'de> for JoinSide { match value { "LEFT_SIDE" => Ok(JoinSide::LeftSide), "RIGHT_SIDE" => Ok(JoinSide::RightSide), + "NONE" => Ok(JoinSide::None), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } @@ -3838,6 +3841,7 @@ impl serde::Serialize for JoinType { Self::Leftanti => "LEFTANTI", Self::Rightsemi => "RIGHTSEMI", Self::Rightanti => "RIGHTANTI", + Self::Leftmark => "LEFTMARK", }; serializer.serialize_str(variant) } @@ -3857,6 +3861,7 @@ impl<'de> serde::Deserialize<'de> for JoinType { "LEFTANTI", "RIGHTSEMI", "RIGHTANTI", + "LEFTMARK", ]; struct GeneratedVisitor; @@ -3905,6 +3910,7 @@ impl<'de> serde::Deserialize<'de> for JoinType { "LEFTANTI" => Ok(JoinType::Leftanti), "RIGHTSEMI" => Ok(JoinType::Rightsemi), "RIGHTANTI" => Ok(JoinType::Rightanti), + "LEFTMARK" => Ok(JoinType::Leftmark), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 939a4b3c2cd2..68e7f74c7f49 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -883,6 +883,7 @@ pub enum JoinType { Leftanti = 5, Rightsemi = 6, Rightanti = 7, + Leftmark = 8, } impl JoinType { /// String value of the enum field names used in the ProtoBuf definition. @@ -899,6 +900,7 @@ impl JoinType { Self::Leftanti => "LEFTANTI", Self::Rightsemi => "RIGHTSEMI", Self::Rightanti => "RIGHTANTI", + Self::Leftmark => "LEFTMARK", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -912,6 +914,7 @@ impl JoinType { "LEFTANTI" => Some(Self::Leftanti), "RIGHTSEMI" => Some(Self::Rightsemi), "RIGHTANTI" => Some(Self::Rightanti), + "LEFTMARK" => Some(Self::Leftmark), _ => None, } } @@ -1069,6 +1072,7 @@ impl CompressionTypeVariant { pub enum JoinSide { LeftSide = 0, RightSide = 1, + None = 2, } impl JoinSide { /// String value of the enum field names used in the ProtoBuf definition. @@ -1079,6 +1083,7 @@ impl JoinSide { match self { Self::LeftSide => "LEFT_SIDE", Self::RightSide => "RIGHT_SIDE", + Self::None => "NONE", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1086,6 +1091,7 @@ impl JoinSide { match value { "LEFT_SIDE" => Some(Self::LeftSide), "RIGHT_SIDE" => Some(Self::RightSide), + "NONE" => Some(Self::None), _ => None, } } diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index f9b8973e2d41..02a642a4af93 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -759,6 +759,7 @@ impl From for protobuf::JoinSide { match t { JoinSide::Left => protobuf::JoinSide::LeftSide, JoinSide::Right => protobuf::JoinSide::RightSide, + JoinSide::None => protobuf::JoinSide::None, } } } diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 939a4b3c2cd2..68e7f74c7f49 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -883,6 +883,7 @@ pub enum JoinType { Leftanti = 5, Rightsemi = 6, Rightanti = 7, + Leftmark = 8, } impl JoinType { /// String value of the enum field names used in the ProtoBuf definition. @@ -899,6 +900,7 @@ impl JoinType { Self::Leftanti => "LEFTANTI", Self::Rightsemi => "RIGHTSEMI", Self::Rightanti => "RIGHTANTI", + Self::Leftmark => "LEFTMARK", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -912,6 +914,7 @@ impl JoinType { "LEFTANTI" => Some(Self::Leftanti), "RIGHTSEMI" => Some(Self::Rightsemi), "RIGHTANTI" => Some(Self::Rightanti), + "LEFTMARK" => Some(Self::Leftmark), _ => None, } } @@ -1069,6 +1072,7 @@ impl CompressionTypeVariant { pub enum JoinSide { LeftSide = 0, RightSide = 1, + None = 2, } impl JoinSide { /// String value of the enum field names used in the ProtoBuf definition. @@ -1079,6 +1083,7 @@ impl JoinSide { match self { Self::LeftSide => "LEFT_SIDE", Self::RightSide => "RIGHT_SIDE", + Self::None => "NONE", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1086,6 +1091,7 @@ impl JoinSide { match value { "LEFT_SIDE" => Some(Self::LeftSide), "RIGHT_SIDE" => Some(Self::RightSide), + "NONE" => Some(Self::None), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 27bda7dd5ace..f25fb0bf2561 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -213,6 +213,7 @@ impl From for JoinType { protobuf::JoinType::Rightsemi => JoinType::RightSemi, protobuf::JoinType::Leftanti => JoinType::LeftAnti, protobuf::JoinType::Rightanti => JoinType::RightAnti, + protobuf::JoinType::Leftmark => JoinType::LeftMark, } } } diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index b90ae88aa74a..1993598f5cf7 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -1633,6 +1633,9 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlan::RecursiveQuery(_) => Err(proto_error( "LogicalPlan serde is not yet implemented for RecursiveQuery", )), + LogicalPlan::Execute(_) => Err(proto_error( + "LogicalPlan serde is not yet implemented for Execute", + )), } } } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 5a6f3a32c668..8af7b19d9091 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -685,6 +685,7 @@ impl From for protobuf::JoinType { JoinType::RightSemi => protobuf::JoinType::Rightsemi, JoinType::LeftAnti => protobuf::JoinType::Leftanti, JoinType::RightAnti => protobuf::JoinType::Rightanti, + JoinType::LeftMark => protobuf::JoinType::Leftmark, } } } diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 20ec5eeaeaf8..316166042fc4 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -35,7 +35,7 @@ use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{FileScanConfig, FileSinkConfig}; use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::WindowFunctionDefinition; -use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; +use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::expressions::{ in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr, Literal, NegativeExpr, NotExpr, TryCastExpr, @@ -99,13 +99,13 @@ pub fn parse_physical_sort_exprs( registry: &dyn FunctionRegistry, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, -) -> Result> { +) -> Result { proto .iter() .map(|sort_expr| { parse_physical_sort_expr(sort_expr, registry, input_schema, codec) }) - .collect::>>() + .collect::>() } /// Parses a physical window expr from a protobuf. @@ -175,7 +175,7 @@ pub fn parse_physical_window_expr( name, &window_node_expr, &partition_by, - &order_by, + order_by.as_ref(), Arc::new(window_frame), &extended_schema, false, diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 326c7acab392..e84eae2b9082 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -35,7 +35,7 @@ use datafusion::datasource::physical_plan::{AvroExec, CsvExec}; use datafusion::execution::runtime_env::RuntimeEnv; use datafusion::execution::FunctionRegistry; use datafusion::physical_expr::aggregate::AggregateFunctionExpr; -use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; +use datafusion::physical_expr::{LexOrdering, PhysicalExprRef, PhysicalSortRequirement}; use datafusion::physical_plan::aggregates::AggregateMode; use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy}; use datafusion::physical_plan::analyze::AnalyzeExec; @@ -501,8 +501,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ExprType::AggregateExpr(agg_node) => { let input_phy_expr: Vec> = agg_node.expr.iter() .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::>>()?; - let ordering_req: Vec = agg_node.ordering_req.iter() - .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)).collect::>>()?; + let ordering_req: LexOrdering = agg_node.ordering_req.iter() + .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)) + .collect::>()?; agg_node.aggregate_function.as_ref().map(|func| { match func { AggregateFunction::UserDefinedAggrFunction(udaf_name) => { @@ -874,7 +875,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ) } }) - .collect::, _>>()?; + .collect::>()?; let fetch = if sort.fetch < 0 { None } else { @@ -921,7 +922,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ) } }) - .collect::, _>>()?; + .collect::>()?; let fetch = if sort.fetch < 0 { None } else { @@ -1036,7 +1037,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { &sink_schema, extension_codec, ) - .map(|item| PhysicalSortRequirement::from_sort_exprs(&item)) + .map(|item| PhysicalSortRequirement::from_sort_exprs(&item.inner)) }) .transpose()?; Ok(Arc::new(DataSinkExec::new( @@ -1066,7 +1067,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { &sink_schema, extension_codec, ) - .map(|item| PhysicalSortRequirement::from_sort_exprs(&item)) + .map(|item| PhysicalSortRequirement::from_sort_exprs(&item.inner)) }) .transpose()?; Ok(Arc::new(DataSinkExec::new( @@ -1103,7 +1104,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { &sink_schema, extension_codec, ) - .map(|item| PhysicalSortRequirement::from_sort_exprs(&item)) + .map(|item| { + PhysicalSortRequirement::from_sort_exprs(&item.inner) + }) }) .transpose()?; Ok(Arc::new(DataSinkExec::new( diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 89a2403922e9..4bf7e353326e 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -21,7 +21,7 @@ use std::sync::Arc; #[cfg(feature = "parquet")] use datafusion::datasource::file_format::parquet::ParquetSink; use datafusion::physical_expr::window::{NthValueKind, SlidingAggregateWindowExpr}; -use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; +use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::expressions::{ BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr, Literal, NegativeExpr, NotExpr, NthValue, TryCastExpr, @@ -52,7 +52,10 @@ pub fn serialize_physical_aggr_expr( codec: &dyn PhysicalExtensionCodec, ) -> Result { let expressions = serialize_physical_exprs(&aggr_expr.expressions(), codec)?; - let ordering_req = aggr_expr.order_bys().unwrap_or(&[]).to_vec(); + let ordering_req = match aggr_expr.order_bys() { + Some(order) => LexOrdering::from_ref(order), + None => LexOrdering::default(), + }; let ordering_req = serialize_physical_sort_exprs(ordering_req, codec)?; let name = aggr_expr.fun().name().to_string(); diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 4a9bf6afb49e..1e078ee410c6 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -52,7 +52,8 @@ use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility}; use datafusion::physical_expr::expressions::Literal; use datafusion::physical_expr::window::SlidingAggregateWindowExpr; use datafusion::physical_expr::{ - LexRequirement, PhysicalSortRequirement, ScalarFunctionExpr, + LexOrdering, LexOrderingRef, LexRequirement, PhysicalSortRequirement, + ScalarFunctionExpr, }; use datafusion::physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, @@ -307,7 +308,7 @@ fn roundtrip_window() -> Result<()> { .build() .map(Arc::new)?, &[], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new(None)), )); @@ -327,7 +328,7 @@ fn roundtrip_window() -> Result<()> { let sliding_aggr_window_expr = Arc::new(SlidingAggregateWindowExpr::new( sum_expr, &[], - &[], + LexOrderingRef::default(), Arc::new(window_frame), )); @@ -459,13 +460,13 @@ fn rountrip_aggregate_with_sort() -> Result<()> { let groups: Vec<(Arc, String)> = vec![(col("a", &schema)?, "unused".to_string())]; - let sort_exprs = vec![PhysicalSortExpr { + let sort_exprs = LexOrdering::new(vec![PhysicalSortExpr { expr: col("b", &schema)?, options: SortOptions { descending: false, nulls_first: true, }, - }]; + }]); let aggregates = vec![ @@ -585,7 +586,7 @@ fn roundtrip_sort() -> Result<()> { let field_a = Field::new("a", DataType::Boolean, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); - let sort_exprs = vec![ + let sort_exprs = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions { @@ -600,7 +601,7 @@ fn roundtrip_sort() -> Result<()> { nulls_first: true, }, }, - ]; + ]); roundtrip_test(Arc::new(SortExec::new( sort_exprs, Arc::new(EmptyExec::new(schema)), @@ -612,7 +613,7 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> { let field_a = Field::new("a", DataType::Boolean, false); let field_b = Field::new("b", DataType::Int64, false); let schema = Arc::new(Schema::new(vec![field_a, field_b])); - let sort_exprs = vec![ + let sort_exprs = LexOrdering::new(vec![ PhysicalSortExpr { expr: col("a", &schema)?, options: SortOptions { @@ -627,7 +628,7 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> { nulls_first: true, }, }, - ]; + ]); roundtrip_test(Arc::new(SortExec::new( sort_exprs.clone(), @@ -1013,7 +1014,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { vec![Arc::new(PlainAggregateWindowExpr::new( aggr_expr.clone(), &[col("author", &schema)?], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new(None)), ))], filter, @@ -1074,7 +1075,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { vec![Arc::new(PlainAggregateWindowExpr::new( aggr_expr, &[col("author", &schema)?], - &[], + LexOrderingRef::default(), Arc::new(WindowFrame::new(None)), ))], filter, @@ -1298,17 +1299,17 @@ fn roundtrip_sym_hash_join() -> Result<()> { ] { for left_order in &[ None, - Some(vec![PhysicalSortExpr { + Some(LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("col", schema_left.index_of("col")?)), options: Default::default(), - }]), + }])), ] { for right_order in &[ None, - Some(vec![PhysicalSortExpr { + Some(LexOrdering::new(vec![PhysicalSortExpr { expr: Arc::new(Column::new("col", schema_right.index_of("col")?)), options: Default::default(), - }]), + }])), ] { roundtrip_test(Arc::new( datafusion::physical_plan::joins::SymmetricHashJoinExec::try_new( diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index abb9912b712a..00949aa13ae1 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -48,7 +48,7 @@ use datafusion_expr::{ CreateExternalTable as PlanCreateExternalTable, CreateFunction, CreateFunctionBody, CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, DescribeTable, DmlStatement, DropCatalogSchema, DropFunction, DropTable, DropView, EmptyRelation, - Explain, Expr, ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder, + Execute, Explain, Expr, ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare, SetVariable, SortExpr, Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode, TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, @@ -642,6 +642,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { input: Arc::new(plan), })) } + Statement::Execute { + name, + parameters, + using, + } => { + // `USING` is a MySQL-specific syntax and currently not supported. + if !using.is_empty() { + return not_impl_err!( + "Execute statement with USING is not supported" + ); + } + + let empty_schema = DFSchema::empty(); + let parameters = parameters + .into_iter() + .map(|expr| self.sql_to_expr(expr, &empty_schema, planner_context)) + .collect::>>()?; + + Ok(LogicalPlan::Execute(Execute { + name: ident_to_string(&name), + parameters, + schema: DFSchemaRef::new(empty_schema), + })) + } Statement::ShowTables { extended, diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 2c38a1d36c1e..8167ddacffb4 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -112,6 +112,7 @@ impl Unparser<'_> { | LogicalPlan::Analyze(_) | LogicalPlan::Extension(_) | LogicalPlan::Prepare(_) + | LogicalPlan::Execute(_) | LogicalPlan::Ddl(_) | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) @@ -552,7 +553,7 @@ impl Unparser<'_> { relation, global: false, join_operator: self - .join_operator_to_sql(join.join_type, join_constraint), + .join_operator_to_sql(join.join_type, join_constraint)?, }; let mut from = select.pop_from().unwrap(); from.push_join(ast_join); @@ -855,8 +856,8 @@ impl Unparser<'_> { &self, join_type: JoinType, constraint: ast::JoinConstraint, - ) -> ast::JoinOperator { - match join_type { + ) -> Result { + Ok(match join_type { JoinType::Inner => ast::JoinOperator::Inner(constraint), JoinType::Left => ast::JoinOperator::LeftOuter(constraint), JoinType::Right => ast::JoinOperator::RightOuter(constraint), @@ -865,7 +866,8 @@ impl Unparser<'_> { JoinType::LeftSemi => ast::JoinOperator::LeftSemi(constraint), JoinType::RightAnti => ast::JoinOperator::RightAnti(constraint), JoinType::RightSemi => ast::JoinOperator::RightSemi(constraint), - } + JoinType::LeftMark => unimplemented!("Unparsing of Left Mark join type"), + }) } /// Convert the components of a USING clause to the USING AST. Returns diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index 2479252a7b5b..c3e739d146c6 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -22,6 +22,7 @@ use std::path::{Path, PathBuf}; use clap::Parser; use datafusion_sqllogictest::{DataFusion, TestContext}; use futures::stream::StreamExt; +use itertools::Itertools; use log::info; use sqllogictest::strict_column_validator; @@ -39,6 +40,23 @@ pub fn main() -> Result<()> { .block_on(run_tests()) } +fn value_validator(actual: &[Vec], expected: &[String]) -> bool { + let expected = expected + .iter() + // Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not + // If particular test wants to cover trailing whitespace on a value, + // it should project additional non-whitespace column on the right. + .map(|s| s.trim_end().to_owned()) + .collect::>(); + let actual = actual + .iter() + .map(|strs| strs.iter().join(" ")) + // Editors do not preserve trailing whitespace, so expected may or may not lack it included + .map(|s| s.trim_end().to_owned()) + .collect::>(); + actual == expected +} + /// Sets up an empty directory at test_files/scratch/ /// creating it if needed and clearing any file contents if it exists /// This allows tests for inserting to external tables or copy to @@ -140,6 +158,7 @@ async fn run_test_file(test_file: TestFile) -> Result<()> { )) }); runner.with_column_validator(strict_column_validator); + runner.with_validator(value_validator); runner .run_file_async(path) .await @@ -158,6 +177,7 @@ async fn run_test_file_with_postgres(test_file: TestFile) -> Result<()> { let mut runner = sqllogictest::Runner::new(|| Postgres::connect(relative_path.clone())); runner.with_column_validator(strict_column_validator); + runner.with_validator(value_validator); runner .run_file_async(path) .await @@ -176,7 +196,6 @@ async fn run_complete_file(test_file: TestFile) -> Result<()> { path, relative_path, } = test_file; - use sqllogictest::default_validator; info!("Using complete mode to complete: {}", path.display()); @@ -196,7 +215,7 @@ async fn run_complete_file(test_file: TestFile) -> Result<()> { .update_test_file( path, col_separator, - default_validator, + value_validator, strict_column_validator, ) .await diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index a88ee9d08df0..8337d2e9a39c 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -16,7 +16,7 @@ // under the License. use crate::engines::output::DFColumnType; -use arrow::array::Array; +use arrow::array::{Array, AsArray}; use arrow::datatypes::Fields; use arrow::util::display::ArrayFormatter; use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch}; @@ -238,6 +238,11 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { col, row ))), + DataType::Dictionary(_, _) => { + let dict = col.as_any_dictionary(); + let key = dict.normalized_keys()[row]; + Ok(cell_to_string(dict.values(), key)?) + } _ => { let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS); Ok(f.unwrap().value(row).to_string()) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index f03c3700ab9f..917e037682f2 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -146,7 +146,7 @@ physical_plan 01)AggregateExec: mode=Final, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]] 02)--CoalescePartitionsExec 03)----AggregateExec: mode=Partial, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]] -04)------SortExec: expr=[c2@1 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[true] +04)------SortExec: expr=[c2@1 DESC, c3@2 ASC NULLS LAST], preserve_partitioning=[true] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_agg_multi_order.csv]]}, projection=[c1, c2, c3], has_header=true @@ -6080,7 +6080,7 @@ ORDER BY k; statement ok CREATE TABLE t1(v1 int); -# issue: https://github.com/apache/datafusion/issues/12814 +# issue: https://github.com/apache/datafusion/issues/12814 statement error DataFusion error: Error during planning: Aggregate functions are not allowed in the WHERE clause. Consider using HAVING instead SELECT v1 FROM t1 WHERE ((count(v1) % 1) << 1) > 0; diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index bfdbfb1bcc5e..1e60699a1f65 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -6985,7 +6985,7 @@ select array_resize(column1, column2, column3) from arrays_values; [11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2] [21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] [31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] +NULL [] [51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] @@ -6997,7 +6997,7 @@ select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) f [11, 12, 13, 14, 15, 16, 17, 18, , 20, 2, 2] [21, 22, 23, , 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] [31, 32, 33, 34, 35, , 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] +NULL [] [51, 52, , 54, 55, 56, 57, 58, 59, 60, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] @@ -7013,6 +7013,51 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [ ---- [[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] +# array_resize null value +query ? +select array_resize(arrow_cast(NULL, 'List(Int8)'), 1); +---- +NULL + +statement ok +CREATE TABLE array_resize_values +AS VALUES + (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1), + (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2), + (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3), + (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4), + (NULL, 3, 0), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), + (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7) +; + +# array_resize columnar test #1 +query ? +select array_resize(column1, column2, column3) from array_resize_values; +---- +[1, ] +[11, 12, , 14, 15] +[21, 22, 23, 24, , 26, 27, 28] +[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + +# array_resize columnar test #2 +query ? +select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; +---- +[1, ] +[11, 12, , 14, 15] +[21, 22, 23, 24, , 26, 27, 28] +[31, 32, 33, 34, 35, 36, , 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, , 57, 58, 59, 60, , , ] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + ## array_reverse query ?? select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt index f8ef81a8ba2b..8282331f995e 100644 --- a/datafusion/sqllogictest/test_files/avro.slt +++ b/datafusion/sqllogictest/test_files/avro.slt @@ -198,22 +198,22 @@ NULL query IT SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_multi_files ---- -4 0 -5 1 -6 0 -7 1 -2 0 -3 1 -0 0 -1 1 -4 0 -5 1 -6 0 -7 1 -2 0 -3 1 -0 0 -1 1 +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 +4 0 +5 1 +6 0 +7 1 +2 0 +3 1 +0 0 +1 1 # test avro nested records query ???? diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index 60569803322c..53ca8d81b9e4 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -158,7 +158,7 @@ logical_plan 07)--------Filter: balances.time < Int64(10) 08)----------TableScan: balances physical_plan -01)SortExec: expr=[time@0 ASC NULLS LAST,name@1 ASC NULLS LAST,account_balance@2 ASC NULLS LAST], preserve_partitioning=[false] +01)SortExec: expr=[time@0 ASC NULLS LAST, name@1 ASC NULLS LAST, account_balance@2 ASC NULLS LAST], preserve_partitioning=[false] 02)--RecursiveQueryExec: name=balances, is_distinct=false 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/balance.csv]]}, projection=[time, name, account_balance], has_header=true 04)----CoalescePartitionsExec diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index 3205920d7110..4a0ba87bfa1a 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -804,4 +804,4 @@ query error DataFusion error: Schema error: No field named a\. EXPLAIN CREATE TABLE t(a int) AS VALUES (a + a); statement error DataFusion error: Schema error: No field named a\. -CREATE TABLE t(a int) AS SELECT x FROM (VALUES (a)) t(x) WHERE false; \ No newline at end of file +CREATE TABLE t(a int) AS SELECT x FROM (VALUES (a)) t(x) WHERE false; diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt index 077e8e6474d1..e4cb30628eec 100644 --- a/datafusion/sqllogictest/test_files/describe.slt +++ b/datafusion/sqllogictest/test_files/describe.slt @@ -81,8 +81,8 @@ int_col Int32 YES bigint_col Int64 YES float_col Float32 YES double_col Float64 YES -date_string_col Utf8 YES -string_col Utf8 YES +date_string_col Utf8View YES +string_col Utf8View YES timestamp_col Timestamp(Nanosecond, None) YES year Int32 YES month Int32 YES diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt index 176331f570b0..b6923fcc944d 100644 --- a/datafusion/sqllogictest/test_files/dictionary.slt +++ b/datafusion/sqllogictest/test_files/dictionary.slt @@ -444,3 +444,9 @@ physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column2@1 = 1 03)----MemoryExec: partitions=1, partition_sizes=[1] + +# Window Functions +query I +select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)')); +---- +1 diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 1340fd490e06..54658f36ca14 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -305,8 +305,8 @@ initial_physical_plan 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] initial_physical_plan_with_schema -01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] -02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements 01)OutputRequirementExec, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] @@ -328,7 +328,7 @@ physical_plan after ProjectionPushdown SAME TEXT AS ABOVE physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] physical_plan after SanityCheckPlan SAME TEXT AS ABOVE physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] -physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] statement ok @@ -345,8 +345,8 @@ initial_physical_plan_with_stats 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] initial_physical_plan_with_schema -01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] -02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements 01)OutputRequirementExec 02)--GlobalLimitExec: skip=0, fetch=10 @@ -369,7 +369,7 @@ physical_plan after LimitPushdown ParquetExec: file_groups={1 group: [[WORKSPACE physical_plan after SanityCheckPlan SAME TEXT AS ABOVE physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10 physical_plan_with_stats ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] -physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +physical_plan_with_schema ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:BinaryView;N, string_col:BinaryView;N, timestamp_col:Timestamp(Nanosecond, None);N] statement ok diff --git a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt index 3b4deae3326f..d96044fda8c0 100644 --- a/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt +++ b/datafusion/sqllogictest/test_files/filter_without_sort_exec.slt @@ -37,7 +37,7 @@ logical_plan 02)--Filter: data.ticker = Utf8("A") 03)----TableScan: data projection=[date, ticker, time] physical_plan -01)SortPreservingMergeExec: [date@0 ASC NULLS LAST,time@2 ASC NULLS LAST] +01)SortPreservingMergeExec: [date@0 ASC NULLS LAST, time@2 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: ticker@1 = A 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 @@ -105,7 +105,7 @@ logical_plan 02)--Filter: data.ticker = Utf8("A") AND CAST(data.time AS Date32) = data.date 03)----TableScan: data projection=[date, ticker, time] physical_plan -01)SortPreservingMergeExec: [time@2 ASC NULLS LAST,date@0 ASC NULLS LAST] +01)SortPreservingMergeExec: [time@2 ASC NULLS LAST, date@0 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 @@ -146,7 +146,7 @@ logical_plan 02)--Filter: data.date = Date32("2006-01-02") 03)----TableScan: data projection=[date, ticker, time] physical_plan -01)SortPreservingMergeExec: [ticker@1 ASC NULLS LAST,time@2 ASC NULLS LAST] +01)SortPreservingMergeExec: [ticker@1 ASC NULLS LAST, time@2 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: date@0 = 2006-01-02 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 61b3ad73cd0a..daf270190870 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -2250,7 +2250,7 @@ logical_plan 01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST 02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d] physical_plan -01)PartialSortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2] +01)PartialSortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], common_prefix_length=[2] 02)--StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] query TT @@ -2263,7 +2263,7 @@ logical_plan 01)Sort: annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.d ASC NULLS LAST, fetch=50 02)--TableScan: annotated_data_infinite2 projection=[a0, a, b, c, d] physical_plan -01)PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], common_prefix_length=[2] +01)PartialSortExec: TopK(fetch=50), expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], common_prefix_length=[2] 02)--StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] query TT @@ -2275,7 +2275,7 @@ logical_plan 01)Sort: multiple_ordered_table.a ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST 02)--TableScan: multiple_ordered_table projection=[a0, a, b, c, d] physical_plan -01)SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST], preserve_partitioning=[false] +01)SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST], preserve_partitioning=[false] 02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], has_header=true query TT @@ -2288,7 +2288,7 @@ logical_plan 02)--TableScan: annotated_data_infinite2 projection=[a, b, d] physical_plan 01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[array_agg(annotated_data_infinite2.d) ORDER BY [annotated_data_infinite2.d ASC NULLS LAST]], ordering_mode=Sorted -02)--PartialSortExec: expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST,d@2 ASC NULLS LAST], common_prefix_length=[2] +02)--PartialSortExec: expr=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, d@2 ASC NULLS LAST], common_prefix_length=[2] 03)----StreamingTableExec: partition_sizes=1, projection=[a, b, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST] # as can be seen in the result below d is indeed ordered. @@ -2535,7 +2535,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1] 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted -03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false] +03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST, amount@1 DESC], preserve_partitioning=[false] 04)------MemoryExec: partitions=1, partition_sizes=[1] @@ -2573,7 +2573,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, sum(s.amount)@3 as sum1] 02)--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[array_agg(s.amount) ORDER BY [s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=PartiallySorted([0]) -03)----SortExec: TopK(fetch=10), expr=[country@1 ASC NULLS LAST,amount@2 DESC], preserve_partitioning=[false] +03)----SortExec: TopK(fetch=10), expr=[country@1 ASC NULLS LAST, amount@2 DESC], preserve_partitioning=[false] 04)------MemoryExec: partitions=1, partition_sizes=[1] query TI?R rowsort @@ -2646,7 +2646,7 @@ logical_plan physical_plan 01)ProjectionExec: expr=[country@0 as country, array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, sum(s.amount)@2 as sum1] 02)--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[array_agg(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST], sum(s.amount)], ordering_mode=Sorted -03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST,amount@1 DESC], preserve_partitioning=[false] +03)----SortExec: TopK(fetch=10), expr=[country@0 ASC NULLS LAST, amount@1 DESC], preserve_partitioning=[false] 04)------MemoryExec: partitions=1, partition_sizes=[1] @@ -4328,7 +4328,7 @@ logical_plan 02)--Projection: unbounded_csv_with_timestamps2.name, date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }"), unbounded_csv_with_timestamps2.ts) AS time_chunks 03)----TableScan: unbounded_csv_with_timestamps2 projection=[name, ts] physical_plan -01)SortPreservingMergeExec: [name@0 DESC,time_chunks@1 DESC], fetch=5 +01)SortPreservingMergeExec: [name@0 DESC, time_chunks@1 DESC], fetch=5 02)--ProjectionExec: expr=[name@0 as name, date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 900000000000 }, ts@1) as time_chunks] 03)----RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 04)------StreamingTableExec: partition_sizes=1, projection=[name, ts], infinite_source=true, output_ordering=[name@0 DESC, ts@1 DESC] diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 3630f6c36595..84d18233d572 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -202,7 +202,7 @@ datafusion.execution.parquet.metadata_size_hint NULL datafusion.execution.parquet.pruning true datafusion.execution.parquet.pushdown_filters false datafusion.execution.parquet.reorder_filters false -datafusion.execution.parquet.schema_force_view_types false +datafusion.execution.parquet.schema_force_view_types true datafusion.execution.parquet.skip_metadata true datafusion.execution.parquet.statistics_enabled page datafusion.execution.parquet.write_batch_size 1024 @@ -210,6 +210,7 @@ datafusion.execution.parquet.writer_version 1.0 datafusion.execution.planning_concurrency 13 datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 +datafusion.execution.skip_physical_aggregate_schema_check false datafusion.execution.soft_max_rows_per_output_file 50000000 datafusion.execution.sort_in_place_threshold_bytes 1048576 datafusion.execution.sort_spill_reservation_bytes 10485760 @@ -294,7 +295,7 @@ datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization". datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query -datafusion.execution.parquet.schema_force_view_types false (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`. +datafusion.execution.parquet.schema_force_view_types true (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`. datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes @@ -302,6 +303,7 @@ datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer ve datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode +datafusion.execution.skip_physical_aggregate_schema_check false When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step. datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged. datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured). diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 230ea4d98fc3..804612287246 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -69,7 +69,7 @@ physical_plan 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true] +06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 @@ -129,7 +129,7 @@ physical_plan 02)--CoalescePartitionsExec 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true] +05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 @@ -180,7 +180,7 @@ physical_plan 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1] 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true] +06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index c40f62c3ba80..35decd728eed 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -126,7 +126,7 @@ logical_plan 03)----Values: (Int64(5), Int64(1)), (Int64(4), Int64(2)), (Int64(7), Int64(7)), (Int64(7), Int64(8)), (Int64(7), Int64(9))... physical_plan 01)DataSinkExec: sink=CsvSink(file_groups=[]) -02)--SortExec: expr=[a@0 ASC NULLS LAST,b@1 DESC], preserve_partitioning=[false] +02)--SortExec: expr=[a@0 ASC NULLS LAST, b@1 DESC], preserve_partitioning=[false] 03)----ProjectionExec: expr=[column1@0 as a, column2@1 as b] 04)------ValuesExec @@ -358,7 +358,7 @@ physical_plan 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true] +06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 @@ -419,7 +419,7 @@ physical_plan 02)--CoalescePartitionsExec 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST], preserve_partitioning=[true] +05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true] 06)----------CoalesceBatchesExec: target_batch_size=8192 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index c56c59b1bd78..cf897d628da5 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -94,7 +94,7 @@ logical_plan 07)--------Filter: annotated_data.d = Int32(3) 08)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)] physical_plan -01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10 +01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST, b@1 ASC NULLS LAST], fetch=10 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b] 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1] diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index bc40f845cc8a..93bb1f1f548e 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3243,13 +3243,13 @@ physical_plan 01)SortPreservingMergeExec: [rn1@5 ASC NULLS LAST] 02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)] 03)----CoalesceBatchesExec: target_batch_size=2 -04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST +04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] 07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true 09)----CoalesceBatchesExec: target_batch_size=2 -10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST +10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST 11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 12)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true @@ -3277,11 +3277,11 @@ physical_plan 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST] 02)--SortMergeJoin: join_type=Right, on=[(a@1, a@1)] 03)----CoalesceBatchesExec: target_batch_size=2 -04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST +04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true 07)----CoalesceBatchesExec: target_batch_size=2 -08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST +08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] 11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] @@ -3315,8 +3315,8 @@ logical_plan 09)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] 10)----------TableScan: annotated_data projection=[a0, a, b, c, d] physical_plan -01)SortPreservingMergeExec: [a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST] -02)--SortExec: expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST] +02)--SortExec: expr=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST], preserve_partitioning=[true] 03)----SortMergeJoin: join_type=Inner, on=[(a@1, a@1)] 04)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=2 diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 726de75b5141..10ca3ae881bf 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -42,7 +42,7 @@ describe data; ---- ints Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO strings Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO -timestamp Utf8 NO +timestamp Utf8View NO query ??T SELECT * FROM data ORDER by ints['bytes'] DESC LIMIT 10; @@ -433,7 +433,7 @@ SELECT MAP {'a': 1, null: 2} query ? SELECT MAP {[1,2,3]:1, [2,4]:2}; ---- - {[1, 2, 3]: 1, [2, 4]: 2} +{[1, 2, 3]: 1, [2, 4]: 2} # array with different type as key # expect to fail due to type coercion error @@ -483,7 +483,7 @@ SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }; query ? SELECT MAP { MAP {1:'a', 2:'b', 3:'c'}:1, MAP {2:'c', 4:'d'}:2 }; ---- - {{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2} +{{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2} # map as value query ? diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt index d41b78dcd3f2..abf48fac5364 100644 --- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt +++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt @@ -44,7 +44,7 @@ logical_plan 02)--Projection: CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b 03)----TableScan: multiple_ordered_table projection=[a, b] physical_plan -01)SortPreservingMergeExec: [a_big@0 ASC NULLS LAST,b@1 ASC NULLS LAST] +01)SortPreservingMergeExec: [a_big@0 ASC NULLS LAST, b@1 ASC NULLS LAST] 02)--ProjectionExec: expr=[CAST(a@0 AS Int64) as a_big, b@1 as b] 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true @@ -60,7 +60,7 @@ logical_plan 02)--Projection: multiple_ordered_table.a, CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b 03)----TableScan: multiple_ordered_table projection=[a, b] physical_plan -01)SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@2 ASC NULLS LAST] +01)SortPreservingMergeExec: [a@0 ASC NULLS LAST, b@2 ASC NULLS LAST] 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b] 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true @@ -81,7 +81,7 @@ logical_plan 02)--Projection: multiple_ordered_table.a, CAST(multiple_ordered_table.a AS Int64) AS a_big, multiple_ordered_table.b 03)----TableScan: multiple_ordered_table projection=[a, b] physical_plan -01)SortPreservingMergeExec: [a_big@1 ASC NULLS LAST,b@2 ASC NULLS LAST] +01)SortPreservingMergeExec: [a_big@1 ASC NULLS LAST, b@2 ASC NULLS LAST] 02)--ProjectionExec: expr=[a@0 as a, CAST(a@0 AS Int64) as a_big, b@1 as b] 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true @@ -132,8 +132,8 @@ logical_plan 02)--Projection: CAST(multiple_ordered_table.a AS Utf8) AS a_str, multiple_ordered_table.b 03)----TableScan: multiple_ordered_table projection=[a, b] physical_plan -01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST,b@1 ASC NULLS LAST] -02)--SortExec: expr=[a_str@0 ASC NULLS LAST,b@1 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST] +02)--SortExec: expr=[a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[CAST(a@0 AS Utf8) as a_str, b@1 as b] 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 6cc7ee0403f2..d5f0521407c5 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -165,7 +165,7 @@ logical_plan 03)----TableScan: aggregate_test_100 projection=[c1, c2, c3] physical_plan 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2] -02)--SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST], preserve_partitioning=[false] +02)--SortExec: expr=[c2@1 ASC NULLS LAST, c3@2 ASC NULLS LAST], preserve_partitioning=[false] 03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true query II @@ -335,6 +335,14 @@ select column1 from foo order by log(column2); 3 5 +# Test issue: https://github.com/apache/datafusion/issues/13157 +query I +select column1 from foo order by column2 % 2, column2; +---- +1 +3 +5 + # Cleanup statement ok drop table foo; @@ -683,7 +691,7 @@ logical_plan 01)Sort: t1.id DESC NULLS FIRST, t1.name ASC NULLS LAST 02)--TableScan: t1 projection=[id, name] physical_plan -01)SortExec: expr=[id@0 DESC,name@1 ASC NULLS LAST], preserve_partitioning=[false] +01)SortExec: expr=[id@0 DESC, name@1 ASC NULLS LAST], preserve_partitioning=[false] 02)--MemoryExec: partitions=1, partition_sizes=[1] query IT @@ -702,7 +710,7 @@ logical_plan 01)Sort: t1.id ASC NULLS LAST, t1.name ASC NULLS LAST 02)--TableScan: t1 projection=[id, name] physical_plan -01)SortExec: expr=[id@0 ASC NULLS LAST,name@1 ASC NULLS LAST], preserve_partitioning=[false] +01)SortExec: expr=[id@0 ASC NULLS LAST, name@1 ASC NULLS LAST], preserve_partitioning=[false] 02)--MemoryExec: partitions=1, partition_sizes=[1] @@ -768,8 +776,8 @@ logical_plan 13)------------Projection: column1 AS t 14)--------------Values: (Int64(0)), (Int64(1)) physical_plan -01)SortPreservingMergeExec: [m@0 ASC NULLS LAST,t@1 ASC NULLS LAST] -02)--SortExec: expr=[m@0 ASC NULLS LAST,t@1 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [m@0 ASC NULLS LAST, t@1 ASC NULLS LAST] +02)--SortExec: expr=[m@0 ASC NULLS LAST, t@1 ASC NULLS LAST], preserve_partitioning=[true] 03)----InterleaveExec 04)------ProjectionExec: expr=[0 as m, t@0 as t] 05)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[] @@ -1229,12 +1237,12 @@ logical_plan 09)----------TableScan: ordered_table projection=[a0, b, c, d] physical_plan 01)ProjectionExec: expr=[b@0 as b, c@1 as c, a@2 as a, a0@3 as a0] -02)--SortPreservingMergeExec: [d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a@2 ASC NULLS LAST,a0@3 ASC NULLS LAST,b@0 ASC NULLS LAST], fetch=2 +02)--SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2 03)----UnionExec -04)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a@2 ASC NULLS LAST,b@0 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], has_header=true -07)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST,c@1 ASC NULLS LAST,a0@3 ASC NULLS LAST,b@0 ASC NULLS LAST], preserve_partitioning=[false] +07)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] 08)--------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d] 09)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index bf68a1851137..253ebb9ea0ac 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -89,8 +89,8 @@ logical_plan 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST 02)--TableScan: test_table projection=[int_col, string_col] physical_plan -01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST] -02)--SortExec: expr=[string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST] +02)--SortExec: expr=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col] # Tear down test_table: @@ -119,7 +119,7 @@ logical_plan 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST 02)--TableScan: test_table projection=[int_col, string_col] physical_plan -01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST] +01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST] 02)--ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet]]}, projection=[int_col, string_col], output_ordering=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST] # Add another file to the directory underlying test_table @@ -141,8 +141,8 @@ logical_plan 01)Sort: test_table.string_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST 02)--TableScan: test_table projection=[int_col, string_col] physical_plan -01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST] -02)--SortExec: expr=[string_col@1 ASC NULLS LAST,int_col@0 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST] +02)--SortExec: expr=[string_col@1 ASC NULLS LAST, int_col@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_table/2.parquet]]}, projection=[int_col, string_col] @@ -384,15 +384,15 @@ select arrow_typeof(binaryview_col), binaryview_col FROM binary_as_string_default; ---- -Binary 616161 Binary 616161 Binary 616161 -Binary 626262 Binary 626262 Binary 626262 -Binary 636363 Binary 636363 Binary 636363 -Binary 646464 Binary 646464 Binary 646464 -Binary 656565 Binary 656565 Binary 656565 -Binary 666666 Binary 666666 Binary 666666 -Binary 676767 Binary 676767 Binary 676767 -Binary 686868 Binary 686868 Binary 686868 -Binary 696969 Binary 696969 Binary 696969 +BinaryView 616161 BinaryView 616161 BinaryView 616161 +BinaryView 626262 BinaryView 626262 BinaryView 626262 +BinaryView 636363 BinaryView 636363 BinaryView 636363 +BinaryView 646464 BinaryView 646464 BinaryView 646464 +BinaryView 656565 BinaryView 656565 BinaryView 656565 +BinaryView 666666 BinaryView 666666 BinaryView 666666 +BinaryView 676767 BinaryView 676767 BinaryView 676767 +BinaryView 686868 BinaryView 686868 BinaryView 686868 +BinaryView 696969 BinaryView 696969 BinaryView 696969 # Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicates) query TT @@ -405,13 +405,13 @@ EXPLAIN binaryview_col LIKE '%a%'; ---- logical_plan -01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%") -02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")] +01)Filter: CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%") +02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8View) LIKE Utf8View("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8View) LIKE Utf8View("%a%")] physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 -02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a% +02)--FilterExec: CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a% 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a% +04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8View) LIKE %a% AND CAST(largebinary_col@1 AS Utf8View) LIKE %a% AND CAST(binaryview_col@2 AS Utf8View) LIKE %a% statement ok @@ -432,15 +432,15 @@ select arrow_typeof(binaryview_col), binaryview_col FROM binary_as_string_option; ---- -Utf8 aaa Utf8 aaa Utf8 aaa -Utf8 bbb Utf8 bbb Utf8 bbb -Utf8 ccc Utf8 ccc Utf8 ccc -Utf8 ddd Utf8 ddd Utf8 ddd -Utf8 eee Utf8 eee Utf8 eee -Utf8 fff Utf8 fff Utf8 fff -Utf8 ggg Utf8 ggg Utf8 ggg -Utf8 hhh Utf8 hhh Utf8 hhh -Utf8 iii Utf8 iii Utf8 iii +Utf8View aaa Utf8View aaa Utf8View aaa +Utf8View bbb Utf8View bbb Utf8View bbb +Utf8View ccc Utf8View ccc Utf8View ccc +Utf8View ddd Utf8View ddd Utf8View ddd +Utf8View eee Utf8View eee Utf8View eee +Utf8View fff Utf8View fff Utf8View fff +Utf8View ggg Utf8View ggg Utf8View ggg +Utf8View hhh Utf8View hhh Utf8View hhh +Utf8View iii Utf8View iii Utf8View iii # Run an explain plan to show the cast happens in the plan (there should be no casts) query TT @@ -453,8 +453,8 @@ EXPLAIN binaryview_col LIKE '%a%'; ---- logical_plan -01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8("%a%") -02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%"), binary_as_string_option.largebinary_col LIKE Utf8("%a%"), binary_as_string_option.binaryview_col LIKE Utf8("%a%")] +01)Filter: binary_as_string_option.binary_col LIKE Utf8View("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8View("%a%") +02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8View("%a%"), binary_as_string_option.largebinary_col LIKE Utf8View("%a%"), binary_as_string_option.binaryview_col LIKE Utf8View("%a%")] physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a% @@ -549,3 +549,52 @@ FixedSizeBinary(16) 0166ce1d46129ad104fa4990c6057c91 statement ok DROP TABLE test_non_utf8_binary; + + +## Tests for https://github.com/apache/datafusion/issues/13186 +statement ok +create table cpu (time timestamp, usage_idle float, usage_user float, cpu int); + +statement ok +insert into cpu values ('1970-01-01 00:00:00', 1.0, 2.0, 3); + +# must put it into a parquet file to get statistics +statement ok +copy (select * from cpu) to 'test_files/scratch/parquet/cpu.parquet'; + +# Run queries against parquet files +statement ok +create external table cpu_parquet +stored as parquet +location 'test_files/scratch/parquet/cpu.parquet'; + +# Double filtering +# +# Expect 1 row for both queries +query PI +select time, rn +from ( + select time, row_number() OVER (ORDER BY usage_idle, time) as rn + from cpu + where cpu = 3 +) where rn > 0; +---- +1970-01-01T00:00:00 1 + +query PI +select time, rn +from ( + select time, row_number() OVER (ORDER BY usage_idle, time) as rn + from cpu_parquet + where cpu = 3 +) where rn > 0; +---- +1970-01-01T00:00:00 1 + + +# Clean up +statement ok +drop table cpu; + +statement ok +drop table cpu_parquet; diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt index b7497429fa95..7e315a448b48 100644 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_types.slt @@ -24,18 +24,18 @@ NULL query TT select 'a'::VARCHAR, ''::VARCHAR ---- -a (empty) +a (empty) skipif postgres query TT select 'a'::CHAR, ''::CHAR ---- -a (empty) +a (empty) query TT select 'a'::TEXT, ''::TEXT ---- -a (empty) +a (empty) skipif postgres query I diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt index ce4b7217f990..e306ec7767c7 100644 --- a/datafusion/sqllogictest/test_files/prepare.slt +++ b/datafusion/sqllogictest/test_files/prepare.slt @@ -80,3 +80,21 @@ PREPARE my_plan(INT, DOUBLE, DOUBLE, DOUBLE) AS SELECT id, SUM(age) FROM person statement error PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); + +# test creating logical plan for EXECUTE statements +query TT +EXPLAIN EXECUTE my_plan; +---- +logical_plan Execute: my_plan params=[] + +query TT +EXPLAIN EXECUTE my_plan(10*2 + 1, 'Foo'); +---- +logical_plan Execute: my_plan params=[Int64(21), Utf8("Foo")] + +query error DataFusion error: Schema error: No field named a\. +EXPLAIN EXECUTE my_plan(a); + +# TODO: support EXECUTE queries +query error DataFusion error: This feature is not implemented: Unsupported logical plan: Execute +EXECUTE my_plan; diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index f2ab4135aaa7..c096f6e692af 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -459,7 +459,7 @@ VALUES (-1) query IIB VALUES (2+1,2-1,2>1) ---- -3 1 true +3 1 true # multiple rows values query I rowsort @@ -472,8 +472,8 @@ VALUES (1),(2) query IT rowsort VALUES (1,'a'),(2,'b') ---- -1 a -2 b +1 a +2 b # table foo for distinct order by statement ok @@ -1439,7 +1439,7 @@ logical_plan 02)--Filter: annotated_data_finite2.a = Int32(0) 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0)] physical_plan -01)SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST] +01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: a@1 = 0 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1481,7 +1481,7 @@ logical_plan 02)--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b = Int32(0) 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)] physical_plan -01)SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST] +01)SortPreservingMergeExec: [b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: a@1 = 0 AND b@2 = 0 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1502,7 +1502,7 @@ logical_plan 02)--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b = Int32(0) 03)----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d], partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b = Int32(0)] physical_plan -01)SortPreservingMergeExec: [a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST] +01)SortPreservingMergeExec: [a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: a@1 = 0 AND b@2 = 0 04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1553,7 +1553,7 @@ physical_plan 04)------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[count(*)] 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 06)----------ProjectionExec: expr=[c2@0 as c2] -07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false] +07)------------SortExec: TopK(fetch=4), expr=[c1@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false] 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c1], has_header=true # FilterExec can track equality of non-column expressions. diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt index c181f613ee9a..c43f3a4cc16b 100644 --- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt +++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt @@ -37,6 +37,11 @@ select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base; statement ok drop table test_source +query T +SELECT arrow_cast('', 'Dictionary(Int32, Utf8)'); +---- +(empty) + # TODO: move it back to `string_query.slt.part` after fixing the issue # see detail: https://github.com/apache/datafusion/issues/12637 # Test pattern with wildcard characters @@ -51,6 +56,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false true false Xiangpeng datafusion数据融合 false true false true Raphael datafusionДатаФусион false false false false +under_score un iść core false false false false +percent pan Tadeusz ma iść w kąt false false false false +(empty) (empty) false false false false +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part b/datafusion/sqllogictest/test_files/string/init_data.slt.part index 096e3bb3b330..e3914ea49855 100644 --- a/datafusion/sqllogictest/test_files/string/init_data.slt.part +++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part @@ -20,6 +20,10 @@ create table test_source as values ('Andrew', 'X', 'datafusion📊🔥', '🔥'), ('Xiangpeng', 'Xiangpeng', 'datafusion数据融合', 'datafusion数据融合'), ('Raphael', 'R', 'datafusionДатаФусион', 'аФус'), + ('under_score', 'un_____core', 'un iść core', 'chrząszcz na łące w 東京都'), + ('percent', 'p%t', 'pan Tadeusz ma iść w kąt', 'Pan Tadeusz ma frunąć stąd w kąt'), + ('', '%', '', ''), + (NULL, '%', NULL, NULL), (NULL, 'R', NULL, '🔥'); # -------------------------------------- diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt index 8d8a5711bdb8..1cf906d7dc75 100644 --- a/datafusion/sqllogictest/test_files/string/large_string.slt +++ b/datafusion/sqllogictest/test_files/string/large_string.slt @@ -41,6 +41,10 @@ SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator Andrew X datafusion📊🔥 🔥 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 Raphael R datafusionДатаФусион аФус +under_score un_____core un iść core chrząszcz na łące w 東京都 +percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt +(empty) % (empty) (empty) +NULL % NULL NULL NULL R NULL 🔥 # TODO: move it back to `string_query.slt.part` after fixing the issue @@ -57,6 +61,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false true false Xiangpeng datafusion数据融合 false true false true Raphael datafusionДатаФусион false false false false +under_score un iść core false false false false +percent pan Tadeusz ma iść w kąt false false false false +(empty) (empty) false false false false +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt index e84342abd3df..9e97712b6871 100644 --- a/datafusion/sqllogictest/test_files/string/string.slt +++ b/datafusion/sqllogictest/test_files/string/string.slt @@ -48,6 +48,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false true false Xiangpeng datafusion数据融合 false true false true Raphael datafusionДатаФусион false false false false +under_score un iść core false false false false +percent pan Tadeusz ma iść w kąt false false false false +(empty) (empty) false false false false +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # @@ -55,6 +59,116 @@ NULL NULL NULL NULL NULL NULL # include ./string_query.slt.part +# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part` +# dynamic LIKE as filter +query TTT rowsort +SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2 +UNION ALL +SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2 +UNION ALL +SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2 +UNION ALL +SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2 +UNION ALL +SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2 +UNION ALL +SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2 +---- +(empty) is LIKE % +(empty) is LIKE % +(empty) is LIKE % +Andrew is NOT LIKE X +Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t +Raphael is NOT LIKE R +Xiangpeng is LIKE Xiangpeng +chrząszcz na łące w 東京都 is NOT LIKE un_____core +datafusionДатаФусион is NOT LIKE R +datafusion数据融合 is NOT LIKE Xiangpeng +datafusion数据融合 is NOT LIKE Xiangpeng +datafusion📊🔥 is NOT LIKE X +pan Tadeusz ma iść w kąt is LIKE p%t +percent is LIKE p%t +un iść core is LIKE un_____core +under_score is LIKE un_____core +аФус is NOT LIKE R +🔥 is NOT LIKE R +🔥 is NOT LIKE X + +# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part` +# dynamic LIKE as projection +query TTTTBBBB rowsort +SELECT + ascii_1, ascii_2, unicode_1, unicode_2, + (ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2, + (ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1, + (unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2, + (unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2 +FROM test_basic_operator +---- +(empty) % (empty) (empty) true false true true +Andrew X datafusion📊🔥 🔥 false false false false +NULL % NULL NULL NULL NULL NULL NULL +NULL R NULL 🔥 NULL NULL NULL false +Raphael R datafusionДатаФусион аФус false false false false +Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false +percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false +under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false + +# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part` +# dynamic ILIKE as filter +query TTT rowsort +SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2 +UNION ALL +SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2 +UNION ALL +SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2 +UNION ALL +SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2 +UNION ALL +SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2 +UNION ALL +SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2 +---- +(empty) is ILIKE % +(empty) is ILIKE % +(empty) is ILIKE % +Andrew is NOT ILIKE X +Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t +Raphael is NOT ILIKE R +Xiangpeng is ILIKE Xiangpeng +chrząszcz na łące w 東京都 is NOT ILIKE un_____core +datafusionДатаФусион is NOT ILIKE R +datafusion数据融合 is NOT ILIKE Xiangpeng +datafusion数据融合 is NOT ILIKE Xiangpeng +datafusion📊🔥 is NOT ILIKE X +pan Tadeusz ma iść w kąt is ILIKE p%t +percent is ILIKE p%t +un iść core is ILIKE un_____core +under_score is ILIKE un_____core +аФус is NOT ILIKE R +🔥 is NOT ILIKE R +🔥 is NOT ILIKE X + +# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part` +# dynamic ILIKE as projection +query TTTTBBBB rowsort +SELECT + ascii_1, ascii_2, unicode_1, unicode_2, + (ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2, + (ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1, + (unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2, + (unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2 +FROM test_basic_operator +---- +(empty) % (empty) (empty) true false true true +Andrew X datafusion📊🔥 🔥 false false false false +NULL % NULL NULL NULL NULL NULL NULL +NULL R NULL 🔥 NULL NULL NULL false +Raphael R datafusionДатаФусион аФус false false false false +Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false +percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true +under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false + # # Clean up # diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 5d847747693d..80bd7fc59c00 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -816,3 +816,8 @@ query B SELECT starts_with('foobar', 'bar') ---- false + +query TT +select ' ', '|' +---- + | diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index dc5626b7d573..c4975b5b8c8d 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -26,6 +26,10 @@ SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator Andrew X datafusion📊🔥 🔥 Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 Raphael R datafusionДатаФусион аФус +under_score un_____core un iść core chrząszcz na łące w 東京都 +percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt +(empty) % (empty) (empty) +NULL % NULL NULL NULL R NULL 🔥 # -------------------------------------- @@ -42,21 +46,28 @@ select ascii_1, ascii_2 from test_basic_operator where ascii_1 <> ascii_2 ---- Andrew X Raphael R +under_score un_____core +percent p%t +(empty) % query TT select unicode_1, unicode_2 from test_basic_operator where unicode_1 = unicode_2 ---- datafusion数据融合 datafusion数据融合 +(empty) (empty) query TT select unicode_1, unicode_2 from test_basic_operator where unicode_1 <> unicode_2 ---- datafusion📊🔥 🔥 datafusionДатаФусион аФус +un iść core chrząszcz na łące w 東京都 +pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt query TT select ascii_1, unicode_1 from test_basic_operator where ascii_1 = unicode_1 ---- +(empty) (empty) query TT select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1 @@ -64,6 +75,8 @@ select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1 Andrew datafusion📊🔥 Xiangpeng datafusion数据融合 Raphael datafusionДатаФусион +under_score un iść core +percent pan Tadeusz ma iść w kąt # -------------------------------------- # column comparison @@ -82,6 +95,10 @@ from test_basic_operator; Andrew X datafusion📊🔥 🔥 false true false true false true Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true false true false false true Raphael R datafusionДатаФусион аФус false true false true false true +under_score un_____core un iść core chrząszcz na łące w 東京都 false true false true false true +percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt false true false true false true +(empty) % (empty) (empty) false true true false true false +NULL % NULL NULL NULL NULL NULL NULL NULL NULL NULL R NULL 🔥 NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -99,6 +116,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false false true Xiangpeng datafusion数据融合 false true true false Raphael datafusionДатаФусион false true false true +under_score un iść core false true false true +percent pan Tadeusz ma iść w kąt false true false true +(empty) (empty) false true false true +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -116,6 +137,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false false true Xiangpeng datafusion数据融合 false true true false Raphael datafusionДатаФусион false true false true +under_score un iść core false true false true +percent pan Tadeusz ma iść w kąt false true false true +(empty) (empty) false true false true +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -133,6 +158,10 @@ from test_basic_operator; Andrew datafusion📊🔥 true false false true Xiangpeng datafusion数据融合 false true true false Raphael datafusionДатаФусион false true false true +under_score un iść core false true false true +percent pan Tadeusz ma iść w kąt false true false true +(empty) (empty) false true false true +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -175,6 +204,10 @@ FROM test_basic_operator; And X dat 🔥 Xia Xia dat dat Rap R dat аФу +und un_ un chr +per p%t pan Pan +(empty) % (empty) (empty) +NULL % NULL NULL NULL R NULL 🔥 # -------------------------------------- @@ -187,7 +220,7 @@ SELECT FROM test_basic_operator ---- -3 3 +6 6 query II SELECT @@ -200,8 +233,11 @@ GROUP BY ascii_2; 1 1 1 1 1 1 +1 1 +1 1 +1 1 -query II +query II rowsort SELECT COUNT(DISTINCT ascii_1), COUNT(DISTINCT unicode_1) @@ -209,6 +245,10 @@ FROM test_basic_operator GROUP BY unicode_2; ---- +0 0 +1 1 +1 1 +1 1 1 1 1 1 1 1 @@ -228,6 +268,10 @@ FROM test_basic_operator false false false false true true false false true false false false +false false false false +false false false false +false true true false +NULL NULL NULL NULL NULL NULL NULL NULL query BBBB @@ -241,6 +285,10 @@ FROM test_basic_operator true false true false false false true true false false true false +false false false false +false false false false +false false false false +NULL false NULL NULL NULL false NULL false # -------------------------------------- @@ -255,6 +303,10 @@ FROM test_basic_operator; Andrew Xiangpeng Raphael +under_scrre +percent +(empty) +NULL NULL query T @@ -265,6 +317,10 @@ FROM test_basic_operator; databusirn📊🔥 databusirn数据融合 databusirnДатаФусион +un iść crre +pan Tadeusz ma iść w kąt +(empty) +NULL NULL # -------------------------------------- @@ -280,6 +336,10 @@ FROM test_basic_operator; Andrfw Xiangpfng Raphafl +undfr_score +pfrcent +(empty) +NULL NULL # Should run REGEXP_REPLACE with Scalar value for string with flag @@ -291,6 +351,10 @@ FROM test_basic_operator; Andrfw Xiangpfng Raphafl +undfr_score +pfrcent +(empty) +NULL NULL # Should run REGEXP_REPLACE with ScalarArray value for string @@ -302,6 +366,10 @@ FROM test_basic_operator; Andrew Xiangpeng Raphael +bar +bar +bar +NULL NULL # Should run REGEXP_REPLACE with ScalarArray value for string with flag @@ -313,6 +381,10 @@ FROM test_basic_operator; Andrew Xiangpeng Raphael +bar +bar +bar +NULL NULL # -------------------------------------- @@ -333,6 +405,10 @@ FROM test_lowercase; Andrew Datafusion📊🔥 Xiangpeng Datafusion数据融合 Raphael Datafusionдатафусион +Under_Score Un Iść Core +Percent Pan Tadeusz Ma Iść W KąT +(empty) (empty) +NULL NULL NULL NULL statement ok @@ -353,6 +429,10 @@ FROM test_basic_operator; 65 88 100 128293 88 88 100 100 82 82 100 1072 +117 117 117 99 +112 112 112 80 +0 37 0 0 +NULL 37 NULL NULL NULL 82 NULL 128293 # -------------------------------------- @@ -373,6 +453,10 @@ FROM test_basic_operator; Andrew ndrew NULL datafusion📊🔥 datafusion📊 NULL Xiangpeng Xiangpeng NULL datafusion数据融合 datafusion数据融合 NULL Raphael Raphael NULL datafusionДатаФусион datafusionДатаФусион NULL +under_score under_score NULL un iść core un iść core NULL +percent percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL +(empty) (empty) NULL (empty) (empty) NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -393,6 +477,10 @@ FROM test_basic_operator; Andrew Andrew NULL datafusion📊🔥 NULL datafusion📊🔥 Xiangpeng (empty) NULL datafusion数据融合 NULL datafusion数据融合 Raphael aphael NULL datafusionДатаФусион NULL datafusionДатаФусион +under_score der_score NULL un iść core NULL un iść core +percent ercent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt +(empty) (empty) NULL (empty) NULL (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -412,6 +500,10 @@ FROM test_basic_operator; And Andrew Andrew NULL datafusion📊 Xiangpeng (empty) Xiangpeng NULL datafusion数据融合 Raphael Raphael Raphael NULL datafusionДатаФусион +under_sco under_s under_score NULL un iść core +percent percen percent NULL pan Tadeusz ma iść w kąt +(empty) (empty) (empty) NULL (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -431,6 +523,10 @@ FROM test_basic_operator; false false NULL true NULL true false true NULL true NULL false false true NULL true NULL false +false false NULL false NULL false +false false NULL false NULL false +false false NULL true NULL false +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -443,6 +539,10 @@ SELECT LOWER(ascii_1) as c1, LOWER(unicode_1) as c2 FROM test_basic_operator; andrew datafusion📊🔥 xiangpeng datafusion数据融合 raphael datafusionдатафусион +under_score un iść core +percent pan tadeusz ma iść w kąt +(empty) (empty) +NULL NULL NULL NULL # -------------------------------------- @@ -455,6 +555,10 @@ SELECT UPPER(ascii_1) as c1, UPPER(unicode_1) as c2 FROM test_basic_operator; ANDREW DATAFUSION📊🔥 XIANGPENG DATAFUSION数据融合 RAPHAEL DATAFUSIONДАТАФУСИОН +UNDER_SCORE UN IŚĆ CORE +PERCENT PAN TADEUSZ MA IŚĆ W KĄT +(empty) (empty) +NULL NULL NULL NULL # -------------------------------------- @@ -480,6 +584,10 @@ FROM test_basic_operator; Andrew:Data AndrewX Andrew Andrewdatafusion📊🔥 Andrew🔥 datafusion📊🔥Andrew datafusion📊🔥🔥 datafusion📊🔥 datafusion📊🔥🔥 🔥 (empty) Andrew,datafusion📊🔥 Xiangpeng:Data XiangpengXiangpeng Xiangpeng Xiangpengdatafusion数据融合 Xiangpengdatafusion数据融合 datafusion数据融合Xiangpeng datafusion数据融合datafusion数据融合 datafusion数据融合 datafusion数据融合🔥 🔥 (empty) Xiangpeng,datafusion数据融合 Raphael:Data RaphaelR Raphael RaphaeldatafusionДатаФусион RaphaelаФус datafusionДатаФусионRaphael datafusionДатаФусионаФус datafusionДатаФусион datafusionДатаФусион🔥 🔥 (empty) Raphael,datafusionДатаФусион +under_score:Data under_scoreun_____core under_score under_scoreun iść core under_scorechrząszcz na łące w 東京都 un iść coreunder_score un iść corechrząszcz na łące w 東京都 un iść core un iść core🔥 🔥 (empty) under_score,un iść core +percent:Data percentp%t percent percentpan Tadeusz ma iść w kąt percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątpercent pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt🔥 🔥 (empty) percent,pan Tadeusz ma iść w kąt +:Data % (empty) (empty) (empty) (empty) (empty) (empty) 🔥 🔥 (empty) , +:Data % (empty) (empty) (empty) (empty) (empty) (empty) 🔥 🔥 (empty) , :Data R (empty) (empty) 🔥 (empty) 🔥 (empty) 🔥 🔥 (empty) , # -------------------------------------- @@ -499,6 +607,10 @@ FROM test_basic_operator; Afooew dfoofusion📊🔥 A🔥drew d🔥tafusion📊🔥 NULL NULL Xfoogpeng dfoofusion数据融合 X🔥angpeng d🔥tafusion数据融合 NULL NULL Rfooael dfoofusionДатаФусион R🔥phael d🔥tafusionДатаФусион NULL NULL +ufoor_score ufoość core u🔥der_score u🔥 iść core NULL NULL +pfooent pfooTadeusz ma iść w kąt p🔥rcent p🔥n Tadeusz ma iść w kąt NULL NULL +foo foo 🔥 🔥 NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -518,6 +630,10 @@ FROM test_basic_operator; Andrew Andrew NULL datafusion📊bar NULL datafusion📊bar Xiangpeng bar NULL bar NULL datafusion数据融合 Raphael baraphael NULL datafusionДатbarион NULL datafusionДатаФусион +under_score under_score NULL un iść core NULL un iść core +percent percent NULL pan Tadeusz ma iść w kąt NULL pan Tadeusz ma iść w kąt +(empty) (empty) NULL bar NULL (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -537,6 +653,10 @@ FROM test_basic_operator; rew (empty) rew n📊🔥 (empty) afusion📊🔥 eng (empty) ngpeng 据融合 (empty) afusion数据融合 ael (empty) hael ион (empty) afusionДатаФусион +ore (empty) er_score ore (empty) iść core +ent (empty) cent kąt (empty) Tadeusz ma iść w kąt +(empty) (empty) (empty) (empty) (empty) (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -557,6 +677,10 @@ FROM test_basic_operator; And (empty) And dat (empty) datafusio Xia (empty) Xiangp dat (empty) datafusion数 Rap (empty) Raph dat (empty) datafusionДатаФус +und (empty) under_sc un (empty) un iść c +per (empty) perc pan (empty) pan Tadeusz ma iść w +(empty) (empty) (empty) (empty) (empty) (empty) +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -574,6 +698,10 @@ FROM test_basic_operator; Andrew Andrew datafusion📊🔥 datafusion📊🔥 Xi Xiangpeng datafusion数据融合 datafusion数据融合 R Raph datafusionД datafusionДат +under_score under_score un iść core un iść core +percent percent pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt +(empty) (empty) (empty) (empty) +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -591,6 +719,10 @@ FROM test_basic_operator; 0 1 0 1 0 2 0 2 0 3 0 3 +0 0 0 0 +0 0 0 0 +0 0 0 0 +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -610,6 +742,10 @@ FROM test_basic_operator; Andrewfoo Andrew🔥 datafusion📊🔥foo datafusion📊🔥🔥 Xiangpengfoo Xiangpeng🔥 datafusion数据融合foo datafusion数据融合🔥 Raphaelfoo Raphael🔥 datafusionДатаФусионfoo datafusionДатаФусион🔥 +under_scorefoo under_score🔥 un iść corefoo un iść core🔥 +percentfoo percent🔥 pan Tadeusz ma iść w kątfoo pan Tadeusz ma iść w kąt🔥 +foo 🔥 foo 🔥 +NULL NULL NULL NULL NULL NULL NULL NULL # || same type (column1 has null, so also tests NULL || NULL) @@ -625,6 +761,10 @@ FROM test_basic_operator; AndrewX Andrew🔥 datafusion📊🔥X datafusion📊🔥🔥 XiangpengXiangpeng Xiangpengdatafusion数据融合 datafusion数据融合Xiangpeng datafusion数据融合datafusion数据融合 RaphaelR RaphaelаФус datafusionДатаФусионR datafusionДатаФусионаФус +under_scoreun_____core under_scorechrząszcz na łące w 東京都 un iść coreun_____core un iść corechrząszcz na łące w 東京都 +percentp%t percentPan Tadeusz ma frunąć stąd w kąt pan Tadeusz ma iść w kątp%t pan Tadeusz ma iść w kątPan Tadeusz ma frunąć stąd w kąt +% (empty) % (empty) +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -640,6 +780,10 @@ FROM test_basic_operator; false false true false false true +false false +false false +false false +NULL NULL NULL NULL query BB @@ -651,6 +795,10 @@ FROM test_basic_operator; true false false false false true +false false +false false +false false +NULL NULL NULL NULL query BB @@ -662,6 +810,10 @@ FROM test_basic_operator; true true true false true true +true true +true true +true true +NULL NULL NULL NULL query BB @@ -673,6 +825,10 @@ FROM test_basic_operator; true true false false true true +true true +true true +true true +NULL NULL NULL NULL # -------------------------------------- @@ -691,6 +847,10 @@ from test_basic_operator; Andrew nice Andrew and X datafusion📊🔥 cool datafusion📊🔥 and 🔥 Andrew 🔥 datafusion📊🔥 Xiangpeng nice Xiangpeng and Xiangpeng datafusion数据融合 cool datafusion数据融合 and datafusion数据融合 Xiangpeng 🔥 datafusion数据融合 Raphael nice Raphael and R datafusionДатаФусион cool datafusionДатаФусион and аФус Raphael 🔥 datafusionДатаФусион +under_score nice under_score and un_____core un iść core cool un iść core and chrząszcz na łące w 東京都 under_score 🔥 un iść core +percent nice percent and p%t pan Tadeusz ma iść w kąt cool pan Tadeusz ma iść w kąt and Pan Tadeusz ma frunąć stąd w kąt percent 🔥 pan Tadeusz ma iść w kąt + nice and % cool and 🔥 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -725,6 +885,10 @@ from test_basic_operator; Andrew datafusion📊🔥 false false false false Xiangpeng datafusion数据融合 false false false false Raphael datafusionДатаФусион false false false false +under_score un iść core false false false false +percent pan Tadeusz ma iść w kąt false false false false +(empty) (empty) false false false false +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -741,6 +905,10 @@ FROM 6 12 9 14 7 20 +11 11 +7 24 +0 0 +NULL NULL NULL NULL # -------------------------------------- @@ -758,6 +926,10 @@ FROM test_basic_operator; true true NULL NULL false true NULL NULL false true NULL NULL +false false NULL NULL +false false NULL NULL +false false NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -775,6 +947,10 @@ FROM test_basic_operator; true false NULL NULL false false NULL NULL false true NULL NULL +false false NULL NULL +false false NULL NULL +false false NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -792,6 +968,10 @@ FROM test_basic_operator; 0 4 NULL NULL 7 0 NULL NULL 6 10 NULL NULL +8 13 NULL NULL +6 19 NULL NULL +6 14 NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -809,18 +989,27 @@ FROM test_basic_operator; xxxxxxxxxxxxxxAndrew NULL 🔥🔥🔥🔥🔥🔥🔥🔥datafusion📊🔥 NULL xxxxxxxxxxxXiangpeng NULL 🔥🔥🔥🔥🔥🔥datafusion数据融合 NULL xxxxxxxxxxxxxRaphael NULL datafusionДатаФусион NULL +xxxxxxxxxunder_score NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥un iść core NULL +xxxxxxxxxxxxxpercent NULL pan Tadeusz ma iść w NULL +xxxxxxxxxxxxxxxxxxxx NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL +NULL NULL NULL NULL NULL NULL NULL NULL -query TT +query TTT SELECT LPAD(ascii_1, 20), - LPAD(unicode_1, 20) + LPAD(unicode_1, 20), + '|' FROM test_basic_operator; ---- - Andrew datafusion📊🔥 - Xiangpeng datafusion数据融合 - Raphael datafusionДатаФусион -NULL NULL + Andrew datafusion📊🔥 | + Xiangpeng datafusion数据融合 | + Raphael datafusionДатаФусион | + under_score un iść core | + percent pan Tadeusz ma iść w | + | +NULL NULL | +NULL NULL | # -------------------------------------- # Test RPAD @@ -837,6 +1026,10 @@ FROM test_basic_operator; Andrewxxxxxxxxxxxxxx NULL datafusion📊🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL Xiangpengxxxxxxxxxxx NULL datafusion数据融合🔥🔥🔥🔥🔥🔥 NULL Raphaelxxxxxxxxxxxxx NULL datafusionДатаФусион NULL +under_scorexxxxxxxxx NULL un iść core🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL +percentxxxxxxxxxxxxx NULL pan Tadeusz ma iść w NULL +xxxxxxxxxxxxxxxxxxxx NULL 🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥 NULL +NULL NULL NULL NULL NULL NULL NULL NULL query TT @@ -845,9 +1038,13 @@ SELECT RPAD(unicode_1, 20) FROM test_basic_operator; ---- -Andrew datafusion📊🔥 -Xiangpeng datafusion数据融合 -Raphael datafusionДатаФусион +Andrew datafusion📊🔥 +Xiangpeng datafusion数据融合 +Raphael datafusionДатаФусион +under_score un iść core +percent pan Tadeusz ma iść w + +NULL NULL NULL NULL # -------------------------------------- @@ -871,6 +1068,10 @@ SELECT false false NULL NULL true false NULL NULL true false NULL NULL true false NULL NULL false true NULL NULL false true NULL NULL +false false NULL NULL false false NULL NULL +false false NULL NULL false false NULL NULL +false false NULL NULL false false NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -895,6 +1096,10 @@ NULL NULL NULL NULL [An] NULL NULL NULL [an] NULL NULL NULL [an] NULL NULL NULL NULL NULL NULL NULL NULL [таФ] NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- # Test REPEAT @@ -909,6 +1114,10 @@ FROM test_basic_operator; AndrewAndrewAndrew datafusion📊🔥datafusion📊🔥datafusion📊🔥 XiangpengXiangpengXiangpeng datafusion数据融合datafusion数据融合datafusion数据融合 RaphaelRaphaelRaphael datafusionДатаФусионdatafusionДатаФусионdatafusionДатаФусион +under_scoreunder_scoreunder_score un iść coreun iść coreun iść core +percentpercentpercent pan Tadeusz ma iść w kątpan Tadeusz ma iść w kątpan Tadeusz ma iść w kąt +(empty) (empty) +NULL NULL NULL NULL # -------------------------------------- @@ -928,6 +1137,10 @@ FROM test_basic_operator; Andr w NULL datafusion📊🔥 (empty) NULL Xiangp ng NULL datafusion数据融合 (empty) NULL Rapha l NULL datafusionДатаФус он NULL +und r_scor NULL un iść core (empty) NULL +p rc NULL pan Tadeusz ma iść w kąt (empty) NULL +(empty) (empty) NULL (empty) (empty) NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -943,6 +1156,10 @@ FROM test_basic_operator; werdnA 🔥📊noisufatad gnepgnaiX 合融据数noisufatad leahpaR ноисуФатаДnoisufatad +erocs_rednu eroc ćśi nu +tnecrep tąk w ćśi am zsuedaT nap +(empty) (empty) +NULL NULL NULL NULL # -------------------------------------- @@ -962,6 +1179,10 @@ FROM test_basic_operator; 5 0 NULL 0 0 NULL 7 3 NULL 0 0 NULL 6 0 NULL 18 18 NULL +4 0 NULL 0 0 NULL +2 0 NULL 0 0 NULL +0 0 NULL 0 0 NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL # -------------------------------------- @@ -981,4 +1202,8 @@ FROM test_basic_operator; Andr Andrew NULL datafusion📊🔥 datafusion📊🔥 NULL Xiangp Xi NULL datafusion数据融合 datafusion数 NULL Rapha Raphael NULL datafusionДатаФус datafusionДатаФусион NULL +und under_score NULL un iść core un iść core NULL +p percent NULL pan Tadeusz ma iść w kąt pan Tadeusz ma iść w kąt NULL +(empty) (empty) NULL (empty) (empty) NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 997dca719147..43b08cb25f3f 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -60,6 +60,7 @@ create table test_source as values ('Andrew', 'X'), ('Xiangpeng', 'Xiangpeng'), ('Raphael', 'R'), + ('', 'Warsaw'), (NULL, 'R'); # Table with the different combination of column types @@ -89,6 +90,7 @@ select octet_length(column1_utf8view) from test; 6 9 7 +0 NULL query error DataFusion error: Arrow error: Compute error: bit_length not supported for Utf8View @@ -100,6 +102,7 @@ select btrim(column1_large_utf8) from test; Andrew Xiangpeng Raphael +(empty) NULL ######## @@ -119,6 +122,7 @@ from test; Andrew X false false true true Xiangpeng Xiangpeng true true false false Raphael R false false true true +(empty) Warsaw false false true true NULL R NULL NULL NULL NULL # test StringViewArray with LargeUtf8 columns @@ -134,6 +138,7 @@ from test; Andrew X false false true true Xiangpeng Xiangpeng true true false false Raphael R false false true true +(empty) Warsaw false false true true NULL R NULL NULL NULL NULL ######## @@ -153,6 +158,7 @@ from test; Andrew X false false true true Xiangpeng Xiangpeng true true false false Raphael R false false true true +(empty) Warsaw false false true true NULL R NULL NULL NULL NULL # StringView column to Dict scalar @@ -168,6 +174,7 @@ from test; Andrew X true true false false Xiangpeng Xiangpeng false false true true Raphael R false false true true +(empty) Warsaw false false true true NULL R NULL NULL NULL NULL # Dict column to StringView scalar @@ -183,6 +190,7 @@ from test; Andrew X true true false false Xiangpeng Xiangpeng false false true true Raphael R false false true true +(empty) Warsaw false false true true NULL R NULL NULL NULL NULL ######## @@ -296,6 +304,7 @@ FROM test; false false false true true true true true true +false false false NULL NULL NULL # Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8 @@ -322,6 +331,7 @@ FROM test; false false false true true true true true true +false false false NULL NULL NULL @@ -702,6 +712,7 @@ FROM test; AndrewX XiangpengXiangpeng RaphaelR +Warsaw R ## Should run CONCAT successfully with utf8 utf8view and largeutf8 @@ -713,6 +724,7 @@ FROM test; AndrewXX XiangpengXiangpengXiangpeng RaphaelRR +WarsawWarsaw RR ## Ensure no casts for REGEXP_LIKE diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 36de19f1c3aa..027b5ca8dcfb 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -1056,13 +1056,11 @@ where t1.t1_id > 40 or t1.t1_id in (select t2.t2_id from t2 where t1.t1_int > 0) ---- logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int -02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NOT NULL -03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists -04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id Filter: t1.t1_int > Int32(0) -05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] -06)--------SubqueryAlias: __correlated_sq_1 -07)----------Projection: t2.t2_id, Boolean(true) AS __exists -08)------------TableScan: t2 projection=[t2_id] +02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.mark +03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id Filter: t1.t1_int > Int32(0) +04)------TableScan: t1 projection=[t1_id, t1_name, t1_int] +05)------SubqueryAlias: __correlated_sq_1 +06)--------TableScan: t2 projection=[t2_id] query ITI rowsort select t1.t1_id, @@ -1085,13 +1083,12 @@ where t1.t1_id = 11 or t1.t1_id + 12 not in (select t2.t2_id + 1 from t2 where t ---- logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int -02)--Filter: t1.t1_id = Int32(11) OR __correlated_sq_1.__exists IS NULL -03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists -04)------Left Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > Int32(0) -05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] -06)--------SubqueryAlias: __correlated_sq_1 -07)----------Projection: CAST(t2.t2_id AS Int64) + Int64(1), Boolean(true) AS __exists -08)------------TableScan: t2 projection=[t2_id] +02)--Filter: t1.t1_id = Int32(11) OR NOT __correlated_sq_1.mark +03)----LeftMark Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > Int32(0) +04)------TableScan: t1 projection=[t1_id, t1_name, t1_int] +05)------SubqueryAlias: __correlated_sq_1 +06)--------Projection: CAST(t2.t2_id AS Int64) + Int64(1) +07)----------TableScan: t2 projection=[t2_id] query ITI rowsort select t1.t1_id, @@ -1113,13 +1110,11 @@ where t1.t1_id > 40 or exists (select * from t2 where t1.t1_id = t2.t2_id) ---- logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int -02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NOT NULL -03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists -04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id -05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] -06)--------SubqueryAlias: __correlated_sq_1 -07)----------Projection: t2.t2_id, Boolean(true) AS __exists -08)------------TableScan: t2 projection=[t2_id] +02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.mark +03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id +04)------TableScan: t1 projection=[t1_id, t1_name, t1_int] +05)------SubqueryAlias: __correlated_sq_1 +06)--------TableScan: t2 projection=[t2_id] query ITI rowsort select t1.t1_id, @@ -1132,6 +1127,9 @@ where t1.t1_id > 40 or exists (select * from t2 where t1.t1_id = t2.t2_id) 22 b 2 44 d 4 +statement ok +set datafusion.explain.logical_plan_only = false; + # not_exists_subquery_to_join_with_correlated_outer_filter_disjunction query TT explain select t1.t1_id, @@ -1142,13 +1140,27 @@ where t1.t1_id > 40 or not exists (select * from t2 where t1.t1_id = t2.t2_id) ---- logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int -02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_1.__exists IS NULL -03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_1.__exists -04)------Left Join: t1.t1_id = __correlated_sq_1.t2_id -05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] -06)--------SubqueryAlias: __correlated_sq_1 -07)----------Projection: t2.t2_id, Boolean(true) AS __exists -08)------------TableScan: t2 projection=[t2_id] +02)--Filter: t1.t1_id > Int32(40) OR NOT __correlated_sq_1.mark +03)----LeftMark Join: t1.t1_id = __correlated_sq_1.t2_id +04)------TableScan: t1 projection=[t1_id, t1_name, t1_int] +05)------SubqueryAlias: __correlated_sq_1 +06)--------TableScan: t2 projection=[t2_id] +physical_plan +01)CoalesceBatchesExec: target_batch_size=2 +02)--FilterExec: t1_id@0 > 40 OR NOT mark@3, projection=[t1_id@0, t1_name@1, t1_int@2] +03)----CoalesceBatchesExec: target_batch_size=2 +04)------HashJoinExec: mode=Partitioned, join_type=LeftMark, on=[(t1_id@0, t2_id@0)] +05)--------CoalesceBatchesExec: target_batch_size=2 +06)----------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4 +07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)--------------MemoryExec: partitions=1, partition_sizes=[1] +09)--------CoalesceBatchesExec: target_batch_size=2 +10)----------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4 +11)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +12)--------------MemoryExec: partitions=1, partition_sizes=[1] + +statement ok +set datafusion.explain.logical_plan_only = true; query ITI rowsort select t1.t1_id, @@ -1170,16 +1182,14 @@ where t1.t1_id in (select t3.t3_id from t3) and (t1.t1_id > 40 or t1.t1_id in (s ---- logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int -02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_2.__exists IS NOT NULL -03)----Projection: t1.t1_id, t1.t1_name, t1.t1_int, __correlated_sq_2.__exists -04)------Left Join: t1.t1_id = __correlated_sq_2.t2_id Filter: t1.t1_int > Int32(0) -05)--------LeftSemi Join: t1.t1_id = __correlated_sq_1.t3_id -06)----------TableScan: t1 projection=[t1_id, t1_name, t1_int] -07)----------SubqueryAlias: __correlated_sq_1 -08)------------TableScan: t3 projection=[t3_id] -09)--------SubqueryAlias: __correlated_sq_2 -10)----------Projection: t2.t2_id, Boolean(true) AS __exists -11)------------TableScan: t2 projection=[t2_id] +02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_2.mark +03)----LeftMark Join: t1.t1_id = __correlated_sq_2.t2_id Filter: t1.t1_int > Int32(0) +04)------LeftSemi Join: t1.t1_id = __correlated_sq_1.t3_id +05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] +06)--------SubqueryAlias: __correlated_sq_1 +07)----------TableScan: t3 projection=[t3_id] +08)------SubqueryAlias: __correlated_sq_2 +09)--------TableScan: t2 projection=[t2_id] query ITI rowsort select t1.t1_id, @@ -1192,6 +1202,18 @@ where t1.t1_id in (select t3.t3_id from t3) and (t1.t1_id > 40 or t1.t1_id in (s 22 b 2 44 d 4 +# Handle duplicate values in exists query +query ITI rowsort +select t1.t1_id, + t1.t1_name, + t1.t1_int +from t1 +where t1.t1_id > 40 or exists (select * from t2 cross join t3 where t1.t1_id = t2.t2_id) +---- +11 a 1 +22 b 2 +44 d 4 + # Nested subqueries query ITI rowsort select t1.t1_id, diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt index e4360a9269ca..a3717dd838d6 100644 --- a/datafusion/sqllogictest/test_files/subquery_sort.slt +++ b/datafusion/sqllogictest/test_files/subquery_sort.slt @@ -65,8 +65,8 @@ logical_plan 05)--------TableScan: sink_table projection=[c1, c2, c3, c9] physical_plan 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2] -02)--SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false] -03)----SortExec: TopK(fetch=2), expr=[c1@0 DESC,c3@2 ASC NULLS LAST], preserve_partitioning=[false] +02)--SortExec: expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false] +03)----SortExec: TopK(fetch=2), expr=[c1@0 DESC, c3@2 ASC NULLS LAST], preserve_partitioning=[false] 04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c9], has_header=true @@ -98,7 +98,7 @@ logical_plan 07)------------TableScan: sink_table projection=[c1, c3, c9] physical_plan 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r] -02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST,c3@2 ASC NULLS LAST,c9@3 ASC NULLS LAST], preserve_partitioning=[false] +02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false] 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9] 04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Utf8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false] @@ -118,8 +118,8 @@ logical_plan 07)------------TableScan: sink_table projection=[c1, c2, c3, c9] physical_plan 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2] -02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST] -03)----SortExec: expr=[c1@0 ASC NULLS LAST,c3@2 DESC,c9@3 ASC NULLS LAST], preserve_partitioning=[true] +02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c3@2 DESC, c9@3 ASC NULLS LAST] +03)----SortExec: expr=[c1@0 ASC NULLS LAST, c3@2 DESC, c9@3 ASC NULLS LAST], preserve_partitioning=[true] 04)------ProjectionExec: expr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@1 as c1, first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@2 as c2, first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@3 as c3, first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]@4 as c9] 05)--------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(sink_table.c1) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c2) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c3) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST], first_value(sink_table.c9) ORDER BY [sink_table.c1 ASC NULLS LAST, sink_table.c3 DESC NULLS FIRST, sink_table.c9 ASC NULLS LAST]] 06)----------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 38c2a6647273..a09a63a791fc 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); ---- 2022-06-01T00:00:00 +# Times before the unix epoch +query P +select date_bin('1 hour', column1) +from (values + (timestamp '1969-01-01 00:00:00'), + (timestamp '1969-01-01 00:15:00'), + (timestamp '1969-01-01 00:30:00'), + (timestamp '1969-01-01 00:45:00'), + (timestamp '1969-01-01 01:00:00') +) as sq +---- +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T00:00:00 +1969-01-01T01:00:00 + ### ## test date_trunc function ### diff --git a/datafusion/sqllogictest/test_files/tpch/q1.slt.part b/datafusion/sqllogictest/test_files/tpch/q1.slt.part index 8cfd25d26c07..4d4323e93e9e 100644 --- a/datafusion/sqllogictest/test_files/tpch/q1.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q1.slt.part @@ -47,8 +47,8 @@ logical_plan 05)--------Filter: lineitem.l_shipdate <= Date32("1998-09-02") 06)----------TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-02")] physical_plan -01)SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] -02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST] +02)--SortExec: expr=[l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] 04)------AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/q13.slt.part index bb32fb209700..2a9fb12a31c2 100644 --- a/datafusion/sqllogictest/test_files/tpch/q13.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q13.slt.part @@ -53,8 +53,8 @@ logical_plan 11)------------------Filter: orders.o_comment NOT LIKE Utf8("%special%requests%") 12)--------------------TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%special%requests%")] physical_plan -01)SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC], fetch=10 -02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] +01)SortPreservingMergeExec: [custdist@1 DESC, c_count@0 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[custdist@1 DESC, c_count@0 DESC], preserve_partitioning=[true] 03)----ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] 04)------AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q16.slt.part b/datafusion/sqllogictest/test_files/tpch/q16.slt.part index 8058371764f2..6b2c2f7fdc3e 100644 --- a/datafusion/sqllogictest/test_files/tpch/q16.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q16.slt.part @@ -65,8 +65,8 @@ logical_plan 13)--------------Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%") 14)----------------TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")] physical_plan -01)SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], fetch=10 -02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] 04)------AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q18.slt.part b/datafusion/sqllogictest/test_files/tpch/q18.slt.part index e78b0d87f651..c80352c5d36a 100644 --- a/datafusion/sqllogictest/test_files/tpch/q18.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q18.slt.part @@ -67,8 +67,8 @@ logical_plan 14)------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] 15)--------------TableScan: lineitem projection=[l_orderkey, l_quantity] physical_plan -01)SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST] -02)--SortExec: expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST] +02)--SortExec: expr=[o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] 03)----AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] 04)------CoalesceBatchesExec: target_batch_size=8192 05)--------RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4 diff --git a/datafusion/sqllogictest/test_files/tpch/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/q2.slt.part index 17f3b78a089d..23ffa0d226b8 100644 --- a/datafusion/sqllogictest/test_files/tpch/q2.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q2.slt.part @@ -99,8 +99,8 @@ logical_plan 34)------------------Filter: region.r_name = Utf8("EUROPE") 35)--------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")] physical_plan -01)SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=10 -02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] 04)------CoalesceBatchesExec: target_batch_size=8192 05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] diff --git a/datafusion/sqllogictest/test_files/tpch/q21.slt.part b/datafusion/sqllogictest/test_files/tpch/q21.slt.part index 5cf069ec7248..93dcd4c68052 100644 --- a/datafusion/sqllogictest/test_files/tpch/q21.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q21.slt.part @@ -90,8 +90,8 @@ logical_plan 30)----------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate 31)------------------TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] physical_plan -01)SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST] -02)--SortExec: expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [numwait@1 DESC, s_name@0 ASC NULLS LAST] +02)--SortExec: expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] 04)------AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q3.slt.part b/datafusion/sqllogictest/test_files/tpch/q3.slt.part index 16a1c2b6ebb1..289e9c7732bb 100644 --- a/datafusion/sqllogictest/test_files/tpch/q3.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q3.slt.part @@ -58,8 +58,8 @@ logical_plan 14)------------Filter: lineitem.l_shipdate > Date32("1995-03-15") 15)--------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")] physical_plan -01)SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 -02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] 04)------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q7.slt.part b/datafusion/sqllogictest/test_files/tpch/q7.slt.part index 5a6cef5311d4..a16af4710478 100644 --- a/datafusion/sqllogictest/test_files/tpch/q7.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q7.slt.part @@ -84,8 +84,8 @@ logical_plan 24)--------------Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE") 25)----------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE")] physical_plan -01)SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] -02)--SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] +01)SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST] +02)--SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true] 03)----ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] 04)------AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/tpch/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/q9.slt.part index b3631f07cc8f..c4910beb842b 100644 --- a/datafusion/sqllogictest/test_files/tpch/q9.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q9.slt.part @@ -75,8 +75,8 @@ logical_plan 21)----------------TableScan: orders projection=[o_orderkey, o_orderdate] 22)------------TableScan: nation projection=[n_nationkey, n_name] physical_plan -01)SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC], fetch=10 -02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] +01)SortPreservingMergeExec: [nation@0 ASC NULLS LAST, o_year@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true] 03)----ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] 04)------AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] 05)--------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 4a2d9e1d6864..d593a985c458 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -361,7 +361,7 @@ physical_plan 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b] 05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST], preserve_partitioning=[true] +06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true] 07)------------CoalesceBatchesExec: target_batch_size=8192 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4 09)----------------UnionExec @@ -1244,7 +1244,7 @@ physical_plan 02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true @@ -1265,7 +1265,7 @@ physical_plan 02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] 03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false] +05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true @@ -1288,9 +1288,9 @@ physical_plan 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] 04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c9@1 ASC NULLS LAST,c2@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false] 06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -07)------------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[false] +07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false] 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true # test_window_partition_by_order_by @@ -1312,12 +1312,12 @@ logical_plan physical_plan 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING] 02)--BoundedWindowAggExec: wdw=[count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -03)----SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[true] +03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 04)------CoalesceBatchesExec: target_batch_size=4096 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING] 07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[true] +08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 09)----------------CoalesceBatchesExec: target_batch_size=4096 10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2 11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -1470,10 +1470,10 @@ physical_plan 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -04)------SortExec: expr=[c9@2 ASC NULLS LAST,c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -07)------------SortExec: expr=[c9@2 DESC,c1@0 DESC], preserve_partitioning=[false] +07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false] 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], has_header=true query IIII @@ -1554,17 +1554,17 @@ physical_plan 03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[c3@2 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false] +06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -08)--------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 ASC], preserve_partitioning=[false] +08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false] 09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -10)------------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 DESC], preserve_partitioning=[false] +10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false] 11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }] 12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false] 14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }] 15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -16)------------------------------SortExec: expr=[c3@2 DESC,c1@0 ASC NULLS LAST], preserve_partitioning=[false] +16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false] 17)--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], has_header=true query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII @@ -1639,7 +1639,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true @@ -1683,7 +1683,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -05)--------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true query III @@ -1730,8 +1730,8 @@ physical_plan 03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }] 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC,c9@3 DESC,c2@1 ASC NULLS LAST] -07)------------SortExec: expr=[__common_expr_1@0 DESC,c9@3 DESC,c2@1 ASC NULLS LAST], preserve_partitioning=[true] +06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST] +07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true] 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9] 09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c4, c9], has_header=true @@ -1823,13 +1823,13 @@ physical_plan 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5 02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2] 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -04)------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC], preserve_partitioning=[true] +04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true] 05)--------CoalesceBatchesExec: target_batch_size=4096 06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] -10)------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST], preserve_partitioning=[false] +10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false] 11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true @@ -1991,7 +1991,7 @@ logical_plan 03)----WindowAggr: windowExpr=[[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] 04)------TableScan: aggregate_test_100 projection=[c1] physical_plan -01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST,rn1@1 ASC NULLS LAST] +01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST] 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1] 03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true] @@ -2023,7 +2023,7 @@ physical_plan 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST] 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(3)), is_causal: false }], mode=[Sorted] -07)------------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 ASC NULLS LAST], preserve_partitioning=[true] +07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true] 08)--------------CoalesceBatchesExec: target_batch_size=4096 09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2 10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 @@ -2112,7 +2112,7 @@ physical_plan 05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] 07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] -08)--------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false] +08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false] 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true @@ -2168,7 +2168,7 @@ physical_plan 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING] 07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] 08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }] -09)----------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST], preserve_partitioning=[false] +09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false] 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias] 11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true @@ -2211,7 +2211,7 @@ physical_plan 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)), is_causal: true }], mode=[Sorted] 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -07)------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST], preserve_partitioning=[false] +07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], has_header=true query RR @@ -2356,7 +2356,7 @@ logical_plan 03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] 04)------TableScan: aggregate_test_100 projection=[c9] physical_plan -01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST], preserve_partitioning=[false] +01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] 03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] @@ -3051,15 +3051,15 @@ physical_plan 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false] 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12] 03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)), is_causal: true }], mode=[Sorted] -04)------SortExec: expr=[d@4 ASC NULLS LAST,a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -06)----------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false] +06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] 07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -08)--------------SortExec: expr=[b@2 ASC NULLS LAST,a@1 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false] +08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] 09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] -10)------------------SortExec: expr=[a@1 ASC NULLS LAST,d@4 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false] +10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] 11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted] -12)----------------------SortExec: expr=[a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,d@4 ASC NULLS LAST,c@3 ASC NULLS LAST], preserve_partitioning=[false] +12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false] 13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)), is_causal: false }, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)), is_causal: false }], mode=[Sorted] 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] 15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true @@ -3144,7 +3144,7 @@ logical_plan 03)----WindowAggr: windowExpr=[[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] 04)------TableScan: aggregate_test_100 projection=[c9] physical_plan -01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST,c9@0 DESC], preserve_partitioning=[false] +01)SortExec: TopK(fetch=5), expr=[sum1@1 ASC NULLS LAST, c9@0 DESC], preserve_partitioning=[false] 02)--ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1] 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] @@ -3264,17 +3264,17 @@ physical_plan 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4] 02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear] 03)----CoalesceBatchesExec: target_batch_size=4096 -04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST,a@1 ASC NULLS LAST +04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] 06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 07)------------CoalesceBatchesExec: target_batch_size=4096 -08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST +08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST 09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[PartiallySorted([0])] 10)------------------CoalesceBatchesExec: target_batch_size=4096 -11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST +11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST 12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted] 13)------------------------CoalesceBatchesExec: target_batch_size=4096 -14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,__common_expr_1@0 ASC NULLS LAST +14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d] 16)------------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 17)--------------------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] @@ -3624,7 +3624,7 @@ physical_plan 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d] 03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Ok(Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: CurrentRow, is_causal: false }], mode=[Linear] 04)------CoalesceBatchesExec: target_batch_size=4096 -05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST +05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 07)------------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]] @@ -3679,14 +3679,14 @@ SELECT FROM score_board s ORDER BY team_name, score; ---- -Mongrels Apu 350 1 -Mongrels Ned 666 1 -Mongrels Meg 1030 2 -Mongrels Burns 1270 2 -Simpsons Homer 1 1 -Simpsons Lisa 710 1 -Simpsons Marge 990 2 -Simpsons Bart 2010 2 +Mongrels Apu 350 1 +Mongrels Ned 666 1 +Mongrels Meg 1030 2 +Mongrels Burns 1270 2 +Simpsons Homer 1 1 +Simpsons Lisa 710 1 +Simpsons Marge 990 2 +Simpsons Bart 2010 2 query TTII SELECT @@ -3697,14 +3697,14 @@ SELECT FROM score_board s ORDER BY score; ---- -Simpsons Homer 1 1 -Mongrels Apu 350 1 -Mongrels Ned 666 1 -Simpsons Lisa 710 1 -Simpsons Marge 990 2 -Mongrels Meg 1030 2 -Mongrels Burns 1270 2 -Simpsons Bart 2010 2 +Simpsons Homer 1 1 +Mongrels Apu 350 1 +Mongrels Ned 666 1 +Simpsons Lisa 710 1 +Simpsons Marge 990 2 +Mongrels Meg 1030 2 +Mongrels Burns 1270 2 +Simpsons Bart 2010 2 query TTII SELECT @@ -3715,14 +3715,14 @@ SELECT FROM score_board s ORDER BY team_name, score; ---- -Mongrels Apu 350 1 -Mongrels Ned 666 2 -Mongrels Meg 1030 3 -Mongrels Burns 1270 4 -Simpsons Homer 1 1 -Simpsons Lisa 710 2 -Simpsons Marge 990 3 -Simpsons Bart 2010 4 +Mongrels Apu 350 1 +Mongrels Ned 666 2 +Mongrels Meg 1030 3 +Mongrels Burns 1270 4 +Simpsons Homer 1 1 +Simpsons Lisa 710 2 +Simpsons Marge 990 3 +Simpsons Bart 2010 4 query TTII SELECT @@ -3733,14 +3733,14 @@ SELECT FROM score_board s ORDER BY team_name, score; ---- -Mongrels Apu 350 1 -Mongrels Ned 666 1 -Mongrels Meg 1030 1 -Mongrels Burns 1270 1 -Simpsons Homer 1 1 -Simpsons Lisa 710 1 -Simpsons Marge 990 1 -Simpsons Bart 2010 1 +Mongrels Apu 350 1 +Mongrels Ned 666 1 +Mongrels Meg 1030 1 +Mongrels Burns 1270 1 +Simpsons Homer 1 1 +Simpsons Lisa 710 1 +Simpsons Marge 990 1 +Simpsons Bart 2010 1 # incorrect number of parameters for ntile query error DataFusion error: Execution error: NTILE requires a positive integer, but finds NULL @@ -4849,10 +4849,10 @@ SELECT nth_value(column2, arrow_cast(2, 'Int32')) OVER (order by column1) FROM t; ---- -3 1 1 4 4 NULL NULL NULL NULL -4 1 1 5 5 3 3 4 4 -5 2 2 6 6 4 4 4 4 -6 2 2 NULL NULL 5 5 4 4 +3 1 1 4 4 NULL NULL NULL NULL +4 1 1 5 5 3 3 4 4 +5 2 2 6 6 4 4 4 4 +6 2 2 NULL NULL 5 5 4 4 # NTILE specifies the argument types so the error is different query error diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 2aaf8ec0aa06..890da7361d7c 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -33,6 +33,7 @@ use datafusion::logical_expr::{ expr::find_df_window_func, Aggregate, BinaryExpr, Case, EmptyRelation, Expr, ExprSchemable, LogicalPlan, Operator, Projection, SortExpr, Values, }; +use substrait::proto::aggregate_rel::Grouping; use substrait::proto::expression::subquery::set_predicate::PredicateOp; use substrait::proto::expression_reference::ExprType; use url::Url; @@ -56,6 +57,7 @@ use datafusion::arrow::array::{new_empty_array, AsArray}; use datafusion::arrow::temporal_conversions::NANOSECONDS; use datafusion::common::scalar::ScalarStructBuilder; use datafusion::dataframe::DataFrame; +use datafusion::logical_expr::builder::project; use datafusion::logical_expr::expr::InList; use datafusion::logical_expr::{ col, expr, Cast, Extension, GroupingSet, Like, LogicalPlanBuilder, Partitioning, @@ -80,6 +82,7 @@ use substrait::proto::expression::literal::{ use substrait::proto::expression::subquery::SubqueryType; use substrait::proto::expression::{FieldReference, Literal, ScalarFunction}; use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile; +use substrait::proto::rel_common::{Emit, EmitKind}; use substrait::proto::{ aggregate_function::AggregationInvocation, expression::{ @@ -93,9 +96,9 @@ use substrait::proto::{ join_rel, plan_rel, r#type, read_rel::ReadType, rel::RelType, - set_rel, + rel_common, set_rel, sort_field::{SortDirection, SortKind::*}, - AggregateFunction, Expression, NamedStruct, Plan, Rel, Type, + AggregateFunction, Expression, NamedStruct, Plan, Rel, RelCommon, Type, }; use substrait::proto::{ExtendedExpression, FunctionArgument, SortField}; @@ -562,42 +565,51 @@ pub async fn from_substrait_rel( rel: &Rel, extensions: &Extensions, ) -> Result { - match &rel.rel_type { + let plan: Result = match &rel.rel_type { Some(RelType::Project(p)) => { if let Some(input) = p.input.as_ref() { let mut input = LogicalPlanBuilder::from( from_substrait_rel(ctx, input, extensions).await?, ); - let mut names: HashSet = HashSet::new(); - let mut exprs: Vec = vec![]; - for e in &p.expressions { - let x = - from_substrait_rex(ctx, e, input.clone().schema(), extensions) + let original_schema = input.schema().clone(); + + // Ensure that all expressions have a unique display name, so that + // validate_unique_names does not fail when constructing the project. + let mut name_tracker = NameTracker::new(); + + // By default, a Substrait Project emits all inputs fields followed by all expressions. + // We build the explicit expressions first, and then the input expressions to avoid + // adding aliases to the explicit expressions (as part of ensuring unique names). + // + // This is helpful for plan visualization and tests, because when DataFusion produces + // Substrait Projects it adds an output mapping that excludes all input columns + // leaving only explicit expressions. + + let mut explicit_exprs: Vec = vec![]; + for expr in &p.expressions { + let e = + from_substrait_rex(ctx, expr, input.clone().schema(), extensions) .await?; // if the expression is WindowFunction, wrap in a Window relation - if let Expr::WindowFunction(_) = &x { + if let Expr::WindowFunction(_) = &e { // Adding the same expression here and in the project below // works because the project's builder uses columnize_expr(..) // to transform it into a column reference - input = input.window(vec![x.clone()])? - } - // Ensure the expression has a unique display name, so that project's - // validate_unique_names doesn't fail - let name = x.schema_name().to_string(); - let mut new_name = name.clone(); - let mut i = 0; - while names.contains(&new_name) { - new_name = format!("{}__temp__{}", name, i); - i += 1; + input = input.window(vec![e.clone()])? } - if new_name != name { - exprs.push(x.alias(new_name.clone())); - } else { - exprs.push(x); - } - names.insert(new_name); + explicit_exprs.push(name_tracker.get_uniquely_named_expr(e)?); + } + + let mut final_exprs: Vec = vec![]; + for index in 0..original_schema.fields().len() { + let e = Expr::Column(Column::from( + original_schema.qualified_field(index), + )); + final_exprs.push(name_tracker.get_uniquely_named_expr(e)?); } - input.project(exprs)?.build() + final_exprs.append(&mut explicit_exprs); + + input.project(final_exprs)?.build() } else { not_impl_err!("Projection without an input is not supported") } @@ -654,39 +666,48 @@ pub async fn from_substrait_rel( let input = LogicalPlanBuilder::from( from_substrait_rel(ctx, input, extensions).await?, ); - let mut group_expr = vec![]; - let mut aggr_expr = vec![]; + let mut ref_group_exprs = vec![]; + + for e in &agg.grouping_expressions { + let x = + from_substrait_rex(ctx, e, input.schema(), extensions).await?; + ref_group_exprs.push(x); + } + + let mut group_exprs = vec![]; + let mut aggr_exprs = vec![]; match agg.groupings.len() { 1 => { - for e in &agg.groupings[0].grouping_expressions { - let x = - from_substrait_rex(ctx, e, input.schema(), extensions) - .await?; - group_expr.push(x); - } + group_exprs.extend_from_slice( + &from_substrait_grouping( + ctx, + &agg.groupings[0], + &ref_group_exprs, + input.schema(), + extensions, + ) + .await?, + ); } _ => { let mut grouping_sets = vec![]; for grouping in &agg.groupings { - let mut grouping_set = vec![]; - for e in &grouping.grouping_expressions { - let x = from_substrait_rex( - ctx, - e, - input.schema(), - extensions, - ) - .await?; - grouping_set.push(x); - } + let grouping_set = from_substrait_grouping( + ctx, + grouping, + &ref_group_exprs, + input.schema(), + extensions, + ) + .await?; grouping_sets.push(grouping_set); } // Single-element grouping expression of type Expr::GroupingSet. // Note that GroupingSet::Rollup would become GroupingSet::GroupingSets, when // parsed by the producer and consumer, since Substrait does not have a type dedicated // to ROLLUP. Only vector of Groupings (grouping sets) is available. - group_expr.push(Expr::GroupingSet(GroupingSet::GroupingSets( + group_exprs.push(Expr::GroupingSet(GroupingSet::GroupingSets( grouping_sets, ))); } @@ -744,9 +765,9 @@ pub async fn from_substrait_rel( "Aggregate without aggregate function is not supported" ), }; - aggr_expr.push(agg_func?.as_ref().clone()); + aggr_exprs.push(agg_func?.as_ref().clone()); } - input.aggregate(group_expr, aggr_expr)?.build() + input.aggregate(group_exprs, aggr_exprs)?.build() } else { not_impl_err!("Aggregate without an input is not valid") } @@ -1074,6 +1095,138 @@ pub async fn from_substrait_rel( })) } _ => not_impl_err!("Unsupported RelType: {:?}", rel.rel_type), + }; + apply_emit_kind(retrieve_rel_common(rel), plan?) +} + +fn retrieve_rel_common(rel: &Rel) -> Option<&RelCommon> { + match rel.rel_type.as_ref() { + None => None, + Some(rt) => match rt { + RelType::Read(r) => r.common.as_ref(), + RelType::Filter(f) => f.common.as_ref(), + RelType::Fetch(f) => f.common.as_ref(), + RelType::Aggregate(a) => a.common.as_ref(), + RelType::Sort(s) => s.common.as_ref(), + RelType::Join(j) => j.common.as_ref(), + RelType::Project(p) => p.common.as_ref(), + RelType::Set(s) => s.common.as_ref(), + RelType::ExtensionSingle(e) => e.common.as_ref(), + RelType::ExtensionMulti(e) => e.common.as_ref(), + RelType::ExtensionLeaf(e) => e.common.as_ref(), + RelType::Cross(c) => c.common.as_ref(), + RelType::Reference(_) => None, + RelType::Write(w) => w.common.as_ref(), + RelType::Ddl(d) => d.common.as_ref(), + RelType::HashJoin(j) => j.common.as_ref(), + RelType::MergeJoin(j) => j.common.as_ref(), + RelType::NestedLoopJoin(j) => j.common.as_ref(), + RelType::Window(w) => w.common.as_ref(), + RelType::Exchange(e) => e.common.as_ref(), + RelType::Expand(e) => e.common.as_ref(), + }, + } +} + +fn retrieve_emit_kind(rel_common: Option<&RelCommon>) -> EmitKind { + // the default EmitKind is Direct if it is not set explicitly + let default = EmitKind::Direct(rel_common::Direct {}); + rel_common + .and_then(|rc| rc.emit_kind.as_ref()) + .map_or(default, |ek| ek.clone()) +} + +fn contains_volatile_expr(proj: &Projection) -> bool { + proj.expr.iter().any(|e| e.is_volatile()) +} + +fn apply_emit_kind( + rel_common: Option<&RelCommon>, + plan: LogicalPlan, +) -> Result { + match retrieve_emit_kind(rel_common) { + EmitKind::Direct(_) => Ok(plan), + EmitKind::Emit(Emit { output_mapping }) => { + // It is valid to reference the same field multiple times in the Emit + // In this case, we need to provide unique names to avoid collisions + let mut name_tracker = NameTracker::new(); + match plan { + // To avoid adding a projection on top of a projection, we apply special case + // handling to flatten Substrait Emits. This is only applicable if none of the + // expressions in the projection are volatile. This is to avoid issues like + // converting a single call of the random() function into multiple calls due to + // duplicate fields in the output_mapping. + LogicalPlan::Projection(proj) if !contains_volatile_expr(&proj) => { + let mut exprs: Vec = vec![]; + for field in output_mapping { + let expr = proj.expr + .get(field as usize) + .ok_or_else(|| substrait_datafusion_err!( + "Emit output field {} cannot be resolved in input schema {}", + field, proj.input.schema().clone() + ))?; + exprs.push(name_tracker.get_uniquely_named_expr(expr.clone())?); + } + + let input = Arc::unwrap_or_clone(proj.input); + project(input, exprs) + } + // Otherwise we just handle the output_mapping as a projection + _ => { + let input_schema = plan.schema(); + + let mut exprs: Vec = vec![]; + for index in output_mapping.into_iter() { + let column = Expr::Column(Column::from( + input_schema.qualified_field(index as usize), + )); + let expr = name_tracker.get_uniquely_named_expr(column)?; + exprs.push(expr); + } + + project(plan, exprs) + } + } + } + } +} + +struct NameTracker { + seen_names: HashSet, +} + +enum NameTrackerStatus { + NeverSeen, + SeenBefore, +} + +impl NameTracker { + fn new() -> Self { + NameTracker { + seen_names: HashSet::default(), + } + } + fn get_unique_name(&mut self, name: String) -> (String, NameTrackerStatus) { + match self.seen_names.insert(name.clone()) { + true => (name, NameTrackerStatus::NeverSeen), + false => { + let mut counter = 0; + loop { + let candidate_name = format!("{}__temp__{}", name, counter); + if self.seen_names.insert(candidate_name.clone()) { + return (candidate_name, NameTrackerStatus::SeenBefore); + } + counter += 1; + } + } + } + } + + fn get_uniquely_named_expr(&mut self, expr: Expr) -> Result { + match self.get_unique_name(expr.name_for_alias()?) { + (_, NameTrackerStatus::NeverSeen) => Ok(expr), + (name, NameTrackerStatus::SeenBefore) => Ok(expr.alias(name)), + } } } @@ -1226,6 +1379,7 @@ fn from_substrait_jointype(join_type: i32) -> Result { join_rel::JoinType::Outer => Ok(JoinType::Full), join_rel::JoinType::LeftAnti => Ok(JoinType::LeftAnti), join_rel::JoinType::LeftSemi => Ok(JoinType::LeftSemi), + join_rel::JoinType::LeftMark => Ok(JoinType::LeftMark), _ => plan_err!("unsupported join type {substrait_join_type:?}"), } } else { @@ -2618,6 +2772,29 @@ fn from_substrait_null( } } +#[allow(deprecated)] +async fn from_substrait_grouping( + ctx: &SessionContext, + grouping: &Grouping, + expressions: &[Expr], + input_schema: &DFSchemaRef, + extensions: &Extensions, +) -> Result> { + let mut group_exprs = vec![]; + if !grouping.grouping_expressions.is_empty() { + for e in &grouping.grouping_expressions { + let expr = from_substrait_rex(ctx, e, input_schema, extensions).await?; + group_exprs.push(expr); + } + return Ok(group_exprs); + } + for idx in &grouping.expression_references { + let e = &expressions[*idx as usize]; + group_exprs.push(e.clone()); + } + Ok(group_exprs) +} + fn from_substrait_field_reference( field_ref: &FieldReference, input_schema: &DFSchema, diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index 408885f70687..4d864e4334ce 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -361,7 +361,7 @@ pub fn to_substrait_rel( } LogicalPlan::Aggregate(agg) => { let input = to_substrait_rel(agg.input.as_ref(), ctx, extensions)?; - let groupings = to_substrait_groupings( + let (grouping_expressions, groupings) = to_substrait_groupings( ctx, &agg.group_expr, agg.input.schema(), @@ -377,7 +377,7 @@ pub fn to_substrait_rel( rel_type: Some(RelType::Aggregate(Box::new(AggregateRel { common: None, input: Some(input), - grouping_expressions: vec![], + grouping_expressions, groupings, measures, advanced_extension: None, @@ -725,7 +725,10 @@ fn to_substrait_jointype(join_type: JoinType) -> join_rel::JoinType { JoinType::Full => join_rel::JoinType::Outer, JoinType::LeftAnti => join_rel::JoinType::LeftAnti, JoinType::LeftSemi => join_rel::JoinType::LeftSemi, - JoinType::RightAnti | JoinType::RightSemi => unimplemented!(), + JoinType::LeftMark => join_rel::JoinType::LeftMark, + JoinType::RightAnti | JoinType::RightSemi => { + unimplemented!() + } } } @@ -771,14 +774,20 @@ pub fn parse_flat_grouping_exprs( exprs: &[Expr], schema: &DFSchemaRef, extensions: &mut Extensions, + ref_group_exprs: &mut Vec, ) -> Result { - let grouping_expressions = exprs - .iter() - .map(|e| to_substrait_rex(ctx, e, schema, 0, extensions)) - .collect::>>()?; + let mut expression_references = vec![]; + let mut grouping_expressions = vec![]; + + for e in exprs { + let rex = to_substrait_rex(ctx, e, schema, 0, extensions)?; + grouping_expressions.push(rex.clone()); + ref_group_exprs.push(rex); + expression_references.push((ref_group_exprs.len() - 1) as u32); + } Ok(Grouping { grouping_expressions, - expression_references: vec![], + expression_references, }) } @@ -787,8 +796,9 @@ pub fn to_substrait_groupings( exprs: &[Expr], schema: &DFSchemaRef, extensions: &mut Extensions, -) -> Result> { - match exprs.len() { +) -> Result<(Vec, Vec)> { + let mut ref_group_exprs = vec![]; + let groupings = match exprs.len() { 1 => match &exprs[0] { Expr::GroupingSet(gs) => match gs { GroupingSet::Cube(_) => Err(DataFusionError::NotImplemented( @@ -796,7 +806,15 @@ pub fn to_substrait_groupings( )), GroupingSet::GroupingSets(sets) => Ok(sets .iter() - .map(|set| parse_flat_grouping_exprs(ctx, set, schema, extensions)) + .map(|set| { + parse_flat_grouping_exprs( + ctx, + set, + schema, + extensions, + &mut ref_group_exprs, + ) + }) .collect::>>()?), GroupingSet::Rollup(set) => { let mut sets: Vec> = vec![vec![]]; @@ -807,19 +825,34 @@ pub fn to_substrait_groupings( .iter() .rev() .map(|set| { - parse_flat_grouping_exprs(ctx, set, schema, extensions) + parse_flat_grouping_exprs( + ctx, + set, + schema, + extensions, + &mut ref_group_exprs, + ) }) .collect::>>()?) } }, _ => Ok(vec![parse_flat_grouping_exprs( - ctx, exprs, schema, extensions, + ctx, + exprs, + schema, + extensions, + &mut ref_group_exprs, )?]), }, _ => Ok(vec![parse_flat_grouping_exprs( - ctx, exprs, schema, extensions, + ctx, + exprs, + schema, + extensions, + &mut ref_group_exprs, )?]), - } + }?; + Ok((ref_group_exprs, groupings)) } #[allow(deprecated)] diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs new file mode 100644 index 000000000000..ac66177ed796 --- /dev/null +++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for Emit Kind usage + +#[cfg(test)] +mod tests { + use crate::utils::test::{add_plan_schemas_to_ctx, read_json}; + + use datafusion::common::Result; + use datafusion::execution::SessionStateBuilder; + use datafusion::prelude::{CsvReadOptions, SessionConfig, SessionContext}; + use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use datafusion_substrait::logical_plan::producer::to_substrait_plan; + + #[tokio::test] + async fn project_respects_direct_emit_kind() -> Result<()> { + let proto_plan = read_json( + "tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json", + ); + let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; + let plan = from_substrait_plan(&ctx, &proto_plan).await?; + + let plan_str = format!("{}", plan); + + assert_eq!( + plan_str, + "Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1\ + \n TableScan: DATA" + ); + Ok(()) + } + + #[tokio::test] + async fn handle_emit_as_project() -> Result<()> { + let proto_plan = read_json( + "tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json", + ); + let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; + let plan = from_substrait_plan(&ctx, &proto_plan).await?; + + let plan_str = format!("{}", plan); + + assert_eq!( + plan_str, + // Note that duplicate references in the remap are aliased + "Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2\ + \n Filter: DATA.B = Int64(2)\ + \n TableScan: DATA" + ); + Ok(()) + } + + async fn make_context() -> Result { + let state = SessionStateBuilder::new() + .with_config(SessionConfig::default()) + .with_default_features() + .build(); + let ctx = SessionContext::new_with_state(state); + ctx.register_csv("data", "tests/testdata/data.csv", CsvReadOptions::default()) + .await?; + Ok(ctx) + } + + #[tokio::test] + async fn handle_emit_as_project_with_volatile_expr() -> Result<()> { + let ctx = make_context().await?; + + let df = ctx + .sql("SELECT random() AS c1, a + 1 AS c2 FROM data") + .await?; + + let plan = df.into_unoptimized_plan(); + assert_eq!( + format!("{}", plan), + "Projection: random() AS c1, data.a + Int64(1) AS c2\ + \n TableScan: data" + ); + + let proto = to_substrait_plan(&plan, &ctx)?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; + // note how the Projections are not flattened + assert_eq!( + format!("{}", plan2), + "Projection: random() AS c1, data.a + Int64(1) AS c2\ + \n Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)\ + \n TableScan: data" + ); + Ok(()) + } + + #[tokio::test] + async fn handle_emit_as_project_without_volatile_exprs() -> Result<()> { + let ctx = make_context().await?; + let df = ctx.sql("SELECT a + 1, b + 2 FROM data").await?; + + let plan = df.into_unoptimized_plan(); + assert_eq!( + format!("{}", plan), + "Projection: data.a + Int64(1), data.b + Int64(2)\ + \n TableScan: data" + ); + + let proto = to_substrait_plan(&plan, &ctx)?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; + + let plan1str = format!("{plan}"); + let plan2str = format!("{plan2}"); + assert_eq!(plan1str, plan2str); + + Ok(()) + } +} diff --git a/datafusion/substrait/tests/cases/mod.rs b/datafusion/substrait/tests/cases/mod.rs index 42aa23626106..b1f4b95df66f 100644 --- a/datafusion/substrait/tests/cases/mod.rs +++ b/datafusion/substrait/tests/cases/mod.rs @@ -16,6 +16,7 @@ // under the License. mod consumer_integration; +mod emit_kind_tests; mod function_test; mod logical_plans; mod roundtrip_logical_plan; diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 04530dd34d4b..5687c9af540a 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -453,15 +453,15 @@ async fn roundtrip_inlist_5() -> Result<()> { // on roundtrip there is an additional projection during TableScan which includes all column of the table, // using assert_expected_plan here as a workaround assert_expected_plan( - "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))", - "Projection: data.a, data.f\ - \n Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR Boolean(true) IS NOT NULL\ - \n Projection: data.a, data.f, Boolean(true)\ - \n Left Join: data.a = data2.a\ - \n TableScan: data projection=[a, f]\ - \n Projection: data2.a, Boolean(true)\ - \n Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\ - \n TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]", + "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))", + + "Projection: data.a, data.f\ + \n Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR data2.mark\ + \n LeftMark Join: data.a = data2.a\ + \n TableScan: data projection=[a, f]\ + \n Projection: data2.a\ + \n Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\ + \n TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]", true).await } @@ -665,6 +665,19 @@ async fn aggregate_wo_projection_consume() -> Result<()> { .await } +#[tokio::test] +async fn aggregate_wo_projection_group_expression_ref_consume() -> Result<()> { + let proto_plan = + read_json("tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json"); + + assert_expected_plan_substrait( + proto_plan, + "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]\ + \n TableScan: data projection=[a]", + ) + .await +} + #[tokio::test] async fn aggregate_wo_projection_sorted_consume() -> Result<()> { let proto_plan = diff --git a/datafusion/substrait/tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json b/datafusion/substrait/tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json new file mode 100644 index 000000000000..b6f14afd6fa9 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json @@ -0,0 +1,98 @@ +{ + "extensionUris": [ + { + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml" + } + ], + "extensions": [ + { + "extensionFunction": { + "functionAnchor": 185, + "name": "count:any" + } + } + ], + "relations": [ + { + "root": { + "input": { + "aggregate": { + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "a" + ], + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_NULLABLE" + } + }, + "namedTable": { + "names": [ + "data" + ] + } + } + }, + "grouping_expressions": [ + { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + ], + "groupings": [ + { + "expression_references": [0] + } + ], + "measures": [ + { + "measure": { + "functionReference": 185, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": {} + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + } + ] + } + } + ] + } + }, + "names": [ + "a", + "countA" + ] + } + } + ], + "version": { + "minorNumber": 54, + "producer": "subframe" + } +} \ No newline at end of file diff --git a/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json b/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json new file mode 100644 index 000000000000..63b275e1723f --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/emit_kind/direct_on_project.substrait.json @@ -0,0 +1,90 @@ +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_arithmetic.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "add:i64_i64" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["A", "B"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["DATA"] + } + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i64": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }] + } + }, + "names": ["a", "b", "add1"] + } + }], + "expectedTypeUrls": [] +} \ No newline at end of file diff --git a/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json b/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json new file mode 100644 index 000000000000..2fc970155955 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/emit_kind/emit_on_filter.substrait.json @@ -0,0 +1,91 @@ +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "equal:any_any" + } + }], + "relations": [{ + "root": { + "input": { + "filter": { + "common": { + "emit": { + "outputMapping": [1, 0, 0] + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["A", "B"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["DATA"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i64": "2", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + } + }, + "names": ["B", "A1", "A2"] + } + }], + "expectedTypeUrls": [] +} \ No newline at end of file diff --git a/dev/changelog/42.2.0.md b/dev/changelog/42.2.0.md new file mode 100644 index 000000000000..6c907162c65e --- /dev/null +++ b/dev/changelog/42.2.0.md @@ -0,0 +1,37 @@ + + +# Apache DataFusion 42.2.0 Changelog + +This release consists of 1 commits from 1 contributor. See credits at the end of this changelog for more information. + +**Other:** + +- +- Backport config option `skip_physical_aggregate_schema_check` #13176 to 42 [#13189](https://github.com/apache/datafusion/pull/13189) (alamb) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 1 Andrew Lamb +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md index 4e52a2fbcaa6..f105ab2c42db 100644 --- a/docs/source/contributor-guide/howtos.md +++ b/docs/source/contributor-guide/howtos.md @@ -45,6 +45,8 @@ Below is a checklist of what you need to do to add a new scalar function to Data - In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result. - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md) - Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md) + - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775) + - Run `./dev/update_function_docs.sh` to update docs [advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs [sqllogictest/test_files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files @@ -64,6 +66,8 @@ Below is a checklist of what you need to do to add a new aggregate function to D - In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result. - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md) - Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md) + - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775) + - Run `./dev/update_function_docs.sh` to update docs ## How to display plans graphically diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md index 90e39c0057c1..b955b09050b3 100644 --- a/docs/source/contributor-guide/testing.md +++ b/docs/source/contributor-guide/testing.md @@ -21,11 +21,27 @@ Tests are critical to ensure that DataFusion is working properly and is not accidentally broken during refactorings. All new features -should have test coverage. +should have test coverage and the entire test suite is run as part of CI. -DataFusion has several levels of tests in its [Test -Pyramid](https://martinfowler.com/articles/practical-test-pyramid.html) -and tries to follow the Rust standard [Testing Organization](https://doc.rust-lang.org/book/ch11-03-test-organization.html) in the The Book. +DataFusion has several levels of tests in its [Test Pyramid] and tries to follow +the Rust standard [Testing Organization] described in [The Book]. + +Run tests using `cargo`: + +```shell +cargo test +``` + +You can also use other runners such as [cargo-nextest]. + +```shell +cargo nextest run +``` + +[test pyramid]: https://martinfowler.com/articles/practical-test-pyramid.html +[testing organization]: https://doc.rust-lang.org/book/ch11-03-test-organization.html +[the book]: https://doc.rust-lang.org/book/ +[cargo-nextest]: https://nexte.st/ ## Unit tests @@ -34,7 +50,7 @@ The [test_util](https://github.com/apache/datafusion/tree/main/datafusion/common ## sqllogictests Tests -DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like any other Rust test using `cargo test --test sqllogictests`. +DataFusion's SQL implementation is tested using [sqllogictest](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) which are run like other tests using `cargo test --test sqllogictests`. `sqllogictests` tests may be less convenient for new contributors who are familiar with writing `.rs` tests as they require learning another tool. However, `sqllogictest` based tests are much easier to develop and maintain as they 1) do not require a slow recompile/link cycle and 2) can be automatically updated via `cargo test --test sqllogictests -- --complete`. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 91a2e8b4389a..bd8591b5d723 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -56,7 +56,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.metadata_size_hint | NULL | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer | | datafusion.execution.parquet.pushdown_filters | false | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization". | | datafusion.execution.parquet.reorder_filters | false | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query | -| datafusion.execution.parquet.schema_force_view_types | false | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`. | +| datafusion.execution.parquet.schema_force_view_types | true | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`. | | datafusion.execution.parquet.binary_as_string | false | (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead. | | datafusion.execution.parquet.data_pagesize_limit | 1048576 | (writing) Sets best effort maximum size of data page in bytes | | datafusion.execution.parquet.write_batch_size | 1024 | (writing) Sets write_batch_size in bytes | @@ -67,7 +67,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 42.1.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 42.2.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | | datafusion.execution.parquet.encoding | NULL | (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting | @@ -79,6 +79,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.maximum_parallel_row_group_writers | 1 | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame. | | datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2 | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame. | | datafusion.execution.planning_concurrency | 0 | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system | +| datafusion.execution.skip_physical_aggregate_schema_check | false | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step. | | datafusion.execution.sort_spill_reservation_bytes | 10485760 | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured). | | datafusion.execution.sort_in_place_threshold_bytes | 1048576 | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged. | | datafusion.execution.meta_fetch_concurrency | 32 | Number of files to read in parallel when inferring schema and statistics |