From f2e8ca063b08a7bb7cbba262be17e124e63a466b Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 13:37:36 -0500 Subject: [PATCH 01/19] patch datafusion deps --- Cargo.lock | 241 ++++++++++++++++++++++++++--------------------------- Cargo.toml | 8 +- 2 files changed, 125 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0835f219..ab2e2b2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,9 +130,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9ba0d7248932f4e2a12fb37f0a2e3ec82b3bdedbac2a1dce186e036843b8f8c" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" dependencies = [ "arrow-arith", "arrow-array", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60afcdc004841a5c8d8da4f4fa22d64eb19c0c01ef4bcedd77f175a7cf6e38f" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f16835e8599dbbb1659fd869d865254c4cf32c6c2bb60b6942ac9fc36bfa5da" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a1f34f0faae77da6b142db61deba2cb6d60167592b178be317b341440acba80" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" dependencies = [ "bytes", "half", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "450e4abb5775bca0740bec0bcf1b1a5ae07eff43bd625661c4436d8e8e4540c4" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3a4e4d63830a341713e35d9a42452fbc6241d5f42fa5cf6a4681b8ad91370c4" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b1e618bbf714c7a9e8d97203c806734f012ff71ae3adc8ad1b075689f540634" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98e983549259a2b97049af7edfb8f28b8911682040e99a94e4ceb1196bd65c2" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b198b9c6fcf086501730efbbcb483317b39330a116125af7bb06467d04b352a3" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2427f37b4459a4b9e533045abe87a5183a5e0995a3fc2c2fd45027ae2cc4ef3f" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15959657d92e2261a7a323517640af87f5afd9fd8a6492e424ebee2203c567f6" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" dependencies = [ "ahash", "arrow-array", @@ -311,18 +311,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf0388a18fd7f7f3fe3de01852d30f54ed5182f9004db700fbe3ba843ed2794" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b83e5723d307a38bf00ecd2972cd078d1339c7fd3eb044f609958a9a24463f3a" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" dependencies = [ "ahash", "arrow-array", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab3db7c09dd826e74079661d84ed01ed06547cf75d52c2818ef776d0d852305" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" dependencies = [ "arrow-array", "arrow-buffer", @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -745,9 +745,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee907b081e45e1d14e1f327e89ef134f91fcebad0bfc2dc229fa9f6044379682" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "apache-avro", @@ -804,9 +803,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2b914f6e33c429af7d8696c72a47ed9225d7e2b82c747ebdfa2408ed53579f" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow-schema", "async-trait", @@ -819,9 +817,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a84f8e76330c582a6b8ada0b2c599ca46cfe46b7585e458fc3f4092bc722a18" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "apache-avro", @@ -832,6 +829,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", + "indexmap", "instant", "libc", "num_cpus", @@ -845,9 +843,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf08cc30d92720d557df13bd5a5696213bd5ea0f38a866d8d85055d866fba774" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "log", "tokio", @@ -855,9 +852,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86bc4183d5c45b9f068a6f351678a0d1eb1225181424542bb75db18ec280b822" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "chrono", @@ -876,9 +872,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202119ce58e4d103e37ae64aab40d4e574c97bdd2bea994bf307b175fcbfa74d" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -888,7 +883,9 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", @@ -898,20 +895,19 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b181ce8569216abb01ef3294aa16c0a40d7d39350c2ff01ede00f167a535f2" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "datafusion-common", + "itertools", "paste", ] [[package]] name = "datafusion-functions" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4124b8066444e05a24472f852e94cf56546c0f4d92d00f018f207216902712" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "arrow-buffer", @@ -936,9 +932,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94acdac235ea21810150a89751617ef2db7e32eba27f54be48a81bde2bfe119" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -950,16 +945,15 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", + "indexmap", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c9ea085bbf900bf16e2ca0f56fc56236b2e4f2e1a2cccb67bcd83c5ab4ad0ef" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -971,9 +965,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c882e61665ed60c5ce9b061c1e587aeb8ae5ae4bcb5e5f2465139ab25328e0f" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "arrow-array", @@ -994,21 +987,31 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98a354ce96df3ca6d025093adac9fd55ca09931c9b6f2630140721a95873fde4" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "datafusion-common", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] name = "datafusion-optimizer" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf677c74fb7b5a1899ef52709e4a70fff3ed80bdfb4bbe495909810e83d5f39" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "async-trait", @@ -1026,9 +1029,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b077999f6eb6c43d6b25bc66332a3be2f693c382840f008dd763b8540f9530" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -1037,30 +1039,25 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64 0.22.1", "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", "indexmap", "itertools", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce847f885c2b13bbe29f5c8b7948797131aa470af6e16d2a94f4428b4f4f1bd" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -1072,13 +1069,14 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d13238e3b9fdd62a4c18760bfef714bb990d1e1d3430e9f416aae4b3cfaa71af" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ + "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools", @@ -1086,9 +1084,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faba6f55a7eaf0241d07d12c2640de52742646b10f754485d5192bdfe2c9ceae" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "ahash", "arrow", @@ -1102,8 +1099,8 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", @@ -1121,9 +1118,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585357d621fa03ea85a7fefca79ebc5ef0ee13a7f82be0762a414879a4d190a7" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "chrono", @@ -1137,9 +1133,8 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4db6534382f92f528bdb5d925b4214c31ffd84fa7fe1eff3ed0d2f1286851ab8" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "chrono", @@ -1171,15 +1166,15 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad8d96a9b52e1aa24f9373696a815be828193efce7cb0bbd2140b6bb67d1819" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow", "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", + "indexmap", "log", "regex", "sqlparser", @@ -1188,9 +1183,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92b1b80e98bf5a9921bf118816e0e766d18527e343153321fcccfe4d68c5c45" +version = "42.1.0" +source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" dependencies = [ "arrow-buffer", "async-recursion", @@ -2086,9 +2080,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" dependencies = [ "ahash", "arrow-array", @@ -2261,9 +2255,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d" dependencies = [ "proc-macro2", "syn", @@ -2552,9 +2546,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" +checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" dependencies = [ "hashbrown 0.14.5", "memchr", @@ -2813,18 +2807,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", @@ -2844,9 +2838,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -2974,9 +2968,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ "log", "sqlparser_derive", @@ -3042,9 +3036,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.41.9" +version = "0.45.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" +checksum = "28df2e4409ff5fb420f96573e76414a225b773f1364bfc9675b8daffe19ca571" dependencies = [ "heck 0.5.0", "pbjson", @@ -3055,6 +3049,7 @@ dependencies = [ "prost-build", "prost-types", "protobuf-src", + "regress", "schemars", "semver", "serde", @@ -3073,9 +3068,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.79" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ "proc-macro2", "quote", @@ -3297,9 +3292,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" dependencies = [ "typify-impl", "typify-macro", @@ -3307,9 +3302,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" dependencies = [ "heck 0.5.0", "log", @@ -3327,9 +3322,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 073f82cf..6766615c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,12 @@ futures = "0.3" object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } url = "2" +[patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion.git", branch = "main" } +datafusion-substrait = { git = "https://github.com/apache/datafusion.git", branch = "main" } +datafusion-proto = { git = "https://github.com/apache/datafusion.git", branch = "main" } + + [build-dependencies] prost-types = "0.13" # keep in line with `datafusion-substrait` pyo3-build-config = "0.22" @@ -58,4 +64,4 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true -codegen-units = 1 +codegen-units = 1 \ No newline at end of file From e14cca2cd0bef4ae82d19d835dde5fa786132dab Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 14 Oct 2024 16:25:31 -0500 Subject: [PATCH 02/19] migrate from deprecated RuntimeEnv::new to RuntimeEnv::try_new Ref: https://github.com/apache/datafusion/pull/12566 --- src/context.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/context.rs b/src/context.rs index f445874d..c2a263fa 100644 --- a/src/context.rs +++ b/src/context.rs @@ -287,7 +287,7 @@ impl PySessionContext { } else { RuntimeConfig::default() }; - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new(RuntimeEnv::try_new(runtime_config)?); let session_state = SessionStateBuilder::new() .with_config(config) .with_runtime_env(runtime) From 2fd2b838713cf75498766cc338a3f7163cace74e Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 14 Oct 2024 16:34:42 -0500 Subject: [PATCH 03/19] remove Arc from create_udf call Ref: https://github.com/apache/datafusion/pull/12489 --- src/udf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/udf.rs b/src/udf.rs index ec8efb16..ea56930e 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -97,7 +97,7 @@ impl PyScalarUDF { let function = create_udf( name, input_types.0, - Arc::new(return_type.0), + return_type.0, parse_volatility(volatility)?, to_scalar_function_impl(func), ); From b099d392451d36842906d90e6dba30a7275c01d5 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 14 Oct 2024 16:35:18 -0500 Subject: [PATCH 04/19] doc typo --- src/udf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/udf.rs b/src/udf.rs index ea56930e..4570e77a 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -31,7 +31,7 @@ use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::expr::PyExpr; use crate::utils::parse_volatility; -/// Create a Rust callable function fr a python function that expects pyarrow arrays +/// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( func: PyObject, ) -> impl Fn(&[ArrayRef]) -> Result { From b851136f631e1e984bccd9880f1eb7a0710dcad7 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 13:44:08 -0500 Subject: [PATCH 05/19] migrage new UnnestOptions API Ref: https://github.com/apache/datafusion/pull/12836/files --- src/dataframe.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index dd5d89ce..b7148b57 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -409,7 +409,9 @@ impl PyDataFrame { #[pyo3(signature = (column, preserve_nulls=true))] fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let df = self .df .as_ref() @@ -420,7 +422,9 @@ impl PyDataFrame { #[pyo3(signature = (columns, preserve_nulls=true))] fn unnest_columns(&self, columns: Vec, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let cols = columns.iter().map(|s| s.as_ref()).collect::>(); let df = self .df From 4cf01790eebc63fe2745f7ab1f16801f2e604aa7 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 13:51:51 -0500 Subject: [PATCH 06/19] update API for logical expr Limit Ref: https://github.com/apache/datafusion/pull/12836 --- python/tests/test_expr.py | 8 ++++++-- src/expr/limit.rs | 22 +++++++++++++--------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 1847edef..d81e04c8 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -85,14 +85,18 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 0 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 0 df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") plan = df.logical_plan() plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 5 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 5 def test_aggregate_query(test_ctx): diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 876e154c..84ad7d68 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -46,7 +46,7 @@ impl Display for PyLimit { write!( f, "Limit - Skip: {} + Skip: {:?} Fetch: {:?} Input: {:?}", &self.limit.skip, &self.limit.fetch, &self.limit.input @@ -56,15 +56,19 @@ impl Display for PyLimit { #[pymethods] impl PyLimit { - /// Retrieves the skip value for this `Limit` - fn skip(&self) -> usize { - self.limit.skip - } + // NOTE: Upstream now has expressions for skip and fetch + // TODO: Do we still want to expose these? + // REF: https://github.com/apache/datafusion/pull/12836 - /// Retrieves the fetch value for this `Limit` - fn fetch(&self) -> Option { - self.limit.fetch - } + // /// Retrieves the skip value for this `Limit` + // fn skip(&self) -> usize { + // self.limit.skip + // } + + // /// Retrieves the fetch value for this `Limit` + // fn fetch(&self) -> Option { + // self.limit.fetch + // } /// Retrieves the input `LogicalPlan` to this `Limit` node fn input(&self) -> PyResult> { From 078dca8d590e74417ce8bb1d6cf58eebedf5bafe Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 13:55:10 -0500 Subject: [PATCH 07/19] remove logical expr CrossJoin It was removed upstream. Ref: https://github.com/apache/datafusion/pull/13076 --- python/datafusion/expr.py | 2 - python/tests/test_imports.py | 2 - src/expr.rs | 2 - src/expr/cross_join.rs | 94 ------------------------------------ src/sql/logical.rs | 2 - 5 files changed, 102 deletions(-) delete mode 100644 src/expr/cross_join.rs diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index c4e7713f..5c14362a 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -51,7 +51,6 @@ Column = expr_internal.Column CreateMemoryTable = expr_internal.CreateMemoryTable CreateView = expr_internal.CreateView -CrossJoin = expr_internal.CrossJoin Distinct = expr_internal.Distinct DropTable = expr_internal.DropTable EmptyRelation = expr_internal.EmptyRelation @@ -140,7 +139,6 @@ "Join", "JoinType", "JoinConstraint", - "CrossJoin", "Union", "Unnest", "UnnestExpr", diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 3d324fb6..6ea77b15 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -46,7 +46,6 @@ Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, @@ -129,7 +128,6 @@ def test_class_module_is_datafusion(): Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, diff --git a/src/expr.rs b/src/expr.rs index 49fa4b84..bca0cd3f 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -65,7 +65,6 @@ pub mod column; pub mod conditional_expr; pub mod create_memory_table; pub mod create_view; -pub mod cross_join; pub mod distinct; pub mod drop_table; pub mod empty_relation; @@ -775,7 +774,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/cross_join.rs b/src/expr/cross_join.rs deleted file mode 100644 index 5bc202aa..00000000 --- a/src/expr/cross_join.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::logical_plan::CrossJoin; -use pyo3::prelude::*; -use std::fmt::{self, Display, Formatter}; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass(name = "CrossJoin", module = "datafusion.expr", subclass)] -#[derive(Clone)] -pub struct PyCrossJoin { - cross_join: CrossJoin, -} - -impl From for PyCrossJoin { - fn from(cross_join: CrossJoin) -> PyCrossJoin { - PyCrossJoin { cross_join } - } -} - -impl From for CrossJoin { - fn from(cross_join: PyCrossJoin) -> Self { - cross_join.cross_join - } -} - -impl Display for PyCrossJoin { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CrossJoin - \nLeft: {:?} - \nRight: {:?} - \nSchema: {:?}", - &self.cross_join.left, &self.cross_join.right, &self.cross_join.schema - ) - } -} - -#[pymethods] -impl PyCrossJoin { - /// Retrieves the left input `LogicalPlan` to this `CrossJoin` node - fn left(&self) -> PyResult { - Ok(self.cross_join.left.as_ref().clone().into()) - } - - /// Retrieves the right input `LogicalPlan` to this `CrossJoin` node - fn right(&self) -> PyResult { - Ok(self.cross_join.right.as_ref().clone().into()) - } - - /// Resulting Schema for this `CrossJoin` node instance - fn schema(&self) -> PyResult { - Ok(self.cross_join.schema.as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CrossJoin({})", self)) - } - - fn __name__(&self) -> PyResult { - Ok("CrossJoin".to_string()) - } -} - -impl LogicalNode for PyCrossJoin { - fn inputs(&self) -> Vec { - vec![ - PyLogicalPlan::from((*self.cross_join.left).clone()), - PyLogicalPlan::from((*self.cross_join.right).clone()), - ] - } - - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) - } -} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index fc398ff8..8aeec326 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; -use crate::expr::cross_join::PyCrossJoin; use crate::expr::distinct::PyDistinct; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; @@ -68,7 +67,6 @@ impl PyLogicalPlan { match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), - LogicalPlan::CrossJoin(plan) => PyCrossJoin::from(plan.clone()).to_variant(py), LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), From d43bb210626524f36bcd9331f10d71b702bee2cb Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Tue, 15 Oct 2024 12:43:11 -0500 Subject: [PATCH 08/19] update PyWindowUDF Ref: https://github.com/apache/datafusion/issues/12803 --- src/udwf.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/udwf.rs b/src/udwf.rs index 43c21ec7..ce692de3 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -20,6 +20,7 @@ use std::ops::Range; use std::sync::Arc; use arrow::array::{make_array, Array, ArrayData, ArrayRef}; +use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; use datafusion::logical_expr::window_state::WindowAggState; use datafusion::scalar::ScalarValue; use pyo3::exceptions::PyValueError; @@ -299,11 +300,21 @@ impl WindowUDFImpl for MultiColumnWindowUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { + // TODO: Should nullable always be `true`? + Ok(arrow::datatypes::Field::new( + field_args.name(), + self.return_type.clone(), + true, + )) } - fn partition_evaluator(&self) -> Result> { + // TODO: Enable passing partition_evaluator_args to python? + fn partition_evaluator( + &self, + _partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { + let _ = _partition_evaluator_args; (self.partition_evaluator_factory)() } } From 42b0961eee34fff258788495ba9a369523572290 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 14:06:36 -0500 Subject: [PATCH 09/19] migrate window functions lead and lag to udwf Ref: https://github.com/apache/datafusion/issues/12802 --- src/functions.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index 4facb6cf..816a58f7 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -757,7 +757,7 @@ pub fn lead( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lead(arg.expr, Some(shift_offset), default_value); + let window_fn = datafusion::functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -771,7 +771,7 @@ pub fn lag( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lag(arg.expr, Some(shift_offset), default_value); + let window_fn = datafusion::functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } From 901d48eb7428faa01514a9b7102ee428888bb3f5 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 14:08:07 -0500 Subject: [PATCH 10/19] migrate window functions rank, dense_rank, and percent_rank to udwf Ref: https://github.com/apache/datafusion/issues/12648 --- src/functions.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index 816a58f7..7bc227a1 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -793,7 +793,7 @@ pub fn rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::rank(); + let window_fn = datafusion::functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -804,7 +804,7 @@ pub fn dense_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::dense_rank(); + let window_fn = datafusion::functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -815,7 +815,7 @@ pub fn percent_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::percent_rank(); + let window_fn = datafusion::functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } From fb7b32caec22ee2b2ca71d3983c94f0a100504df Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 14:09:18 -0500 Subject: [PATCH 11/19] convert window function cume_dist to udwf Ref: https://github.com/apache/datafusion/issues/12695 --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index 7bc227a1..a72eb811 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -826,7 +826,7 @@ pub fn cume_dist( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::cume_dist(); + let window_fn = datafusion::functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } From 6be0016fc2c9d3d51597a2f8aa9a72b6f5fe763f Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 14:09:58 -0500 Subject: [PATCH 12/19] convert window function ntile to udwf Ref: https://github.com/apache/datafusion/issues/12694 --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index a72eb811..6817af7b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -838,7 +838,7 @@ pub fn ntile( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::ntile(arg.into()); + let window_fn = datafusion::functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } From f7f518c74c51aaf2af864401dd0bdf2f10fa3848 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 28 Oct 2024 14:13:48 -0500 Subject: [PATCH 13/19] clean up functions_window invocation --- src/functions.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index 6817af7b..b6dea80c 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,7 +16,6 @@ // under the License. use datafusion::functions_aggregate::all_default_aggregate_functions; -use datafusion::logical_expr::window_function; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -33,6 +32,7 @@ use datafusion::common::{Column, ScalarValue, TableReference}; use datafusion::execution::FunctionRegistry; use datafusion::functions; use datafusion::functions_aggregate; +use datafusion::functions_window; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment; use datafusion::logical_expr::{ @@ -757,7 +757,7 @@ pub fn lead( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -771,7 +771,7 @@ pub fn lag( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -782,7 +782,7 @@ pub fn row_number( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::row_number(); + let window_fn = functions_window::expr_fn::row_number(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -793,7 +793,7 @@ pub fn rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::rank(); + let window_fn = functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -804,7 +804,7 @@ pub fn dense_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::dense_rank(); + let window_fn = functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -815,7 +815,7 @@ pub fn percent_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::percent_rank(); + let window_fn = functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -826,7 +826,7 @@ pub fn cume_dist( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::cume_dist(); + let window_fn = functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -838,7 +838,7 @@ pub fn ntile( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::ntile(arg.into()); + let window_fn = functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } From 3676b1d0c8bcb861b9c4cf00dc15206dd4bec713 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 29 Oct 2024 06:06:51 -0400 Subject: [PATCH 14/19] Only one column was being passed to udwf --- Cargo.lock | 172 +++++++++++++++++++++++++++------------------------- Cargo.toml | 3 +- src/udwf.rs | 12 +++- 3 files changed, 101 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab2e2b2e..7aaaddef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" [[package]] name = "apache-avro" @@ -351,9 +351,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.13" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e614738943d3f68c628ae3dbce7c3daffb196665f82f8c8ea6b65de73c79429" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ "bzip2", "flate2", @@ -515,9 +515,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "bzip2" @@ -542,9 +542,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "jobserver", "libc", @@ -557,6 +557,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -725,9 +731,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" [[package]] name = "dashmap" @@ -1150,6 +1156,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", "futures", @@ -1524,9 +1531,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -1562,9 +1569,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -1678,9 +1685,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -1757,9 +1764,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libflate" @@ -1787,9 +1794,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" @@ -2005,9 +2012,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -2222,9 +2229,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -2255,9 +2262,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", "syn", @@ -2265,9 +2272,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -2336,9 +2343,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" dependencies = [ "cfg-if", "indoc", @@ -2354,9 +2361,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" dependencies = [ "once_cell", "target-lexicon", @@ -2364,9 +2371,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" dependencies = [ "libc", "pyo3-build-config", @@ -2374,9 +2381,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2386,9 +2393,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2450,10 +2457,11 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" dependencies = [ + "cfg_aliases", "libc", "once_cell", "socket2", @@ -2511,9 +2519,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -2556,9 +2564,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -2643,9 +2651,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -2656,9 +2664,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "once_cell", "ring", @@ -2692,9 +2700,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" @@ -2709,9 +2717,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2807,18 +2815,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", @@ -3036,9 +3044,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.45.3" +version = "0.45.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28df2e4409ff5fb420f96573e76414a225b773f1364bfc9675b8daffe19ca571" +checksum = "a127ae9d8e443cea5c2122eb2ffe5fe489e802a1e746a09c5a5cb59d074c0aeb" dependencies = [ "heck 0.5.0", "pbjson", @@ -3068,9 +3076,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.82" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -3107,18 +3115,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", @@ -3162,9 +3170,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", @@ -3442,9 +3450,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -3453,9 +3461,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -3468,9 +3476,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -3480,9 +3488,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3490,9 +3498,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -3503,15 +3511,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -3522,9 +3530,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 6766615c..e0f2461e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "42.0.0", optional = true } datafusion-proto = { version = "42.0.0" } +datafusion-functions-window-common = { version = "42.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -52,7 +53,7 @@ url = "2" datafusion = { git = "https://github.com/apache/datafusion.git", branch = "main" } datafusion-substrait = { git = "https://github.com/apache/datafusion.git", branch = "main" } datafusion-proto = { git = "https://github.com/apache/datafusion.git", branch = "main" } - +datafusion-functions-window-common = { git = "https://github.com/apache/datafusion.git", branch = "main" } [build-dependencies] prost-types = "0.13" # keep in line with `datafusion-substrait` diff --git a/src/udwf.rs b/src/udwf.rs index ce692de3..3f5ad0b1 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -22,10 +22,14 @@ use std::sync::Arc; use arrow::array::{make_array, Array, ArrayData, ArrayRef}; use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; use datafusion::logical_expr::window_state::WindowAggState; +use datafusion::physical_plan::PhysicalExpr; use datafusion::scalar::ScalarValue; +use datafusion_functions_window_common::expr::ExpressionArgs; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use crate::expr::PyExpr; +use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::{DataFusionError, Result}; @@ -34,9 +38,6 @@ use datafusion::logical_expr::{ }; use pyo3::types::{PyList, PyTuple}; -use crate::expr::PyExpr; -use crate::utils::parse_volatility; - #[derive(Debug)] struct RustPartitionEvaluator { evaluator: PyObject, @@ -91,6 +92,7 @@ impl PartitionEvaluator for RustPartitionEvaluator { } fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { + println!("evaluate all called with number of values {}", values.len()); Python::with_gil(|py| { let py_values = PyList::new_bound( py, @@ -317,4 +319,8 @@ impl WindowUDFImpl for MultiColumnWindowUDF { let _ = _partition_evaluator_args; (self.partition_evaluator_factory)() } + + fn expressions(&self, expr_args: ExpressionArgs) -> Vec> { + expr_args.input_exprs().into() + } } From 9661ccf4aebb4a0f93c8efeae96c971ff8f8db03 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 8 Nov 2024 19:08:32 -0500 Subject: [PATCH 15/19] Update to DF 43.0.0 --- Cargo.lock | 110 +++++++++++++++++++++++---------------- Cargo.toml | 14 ++--- python/tests/test_sql.py | 7 +++ src/sql/logical.rs | 1 + 4 files changed, 78 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7aaaddef..497c5b85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -751,8 +751,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ "ahash", "apache-avro", @@ -809,8 +810,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ "arrow-schema", "async-trait", @@ -823,8 +825,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ "ahash", "apache-avro", @@ -849,8 +852,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ "log", "tokio", @@ -858,8 +862,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ "arrow", "chrono", @@ -878,8 +883,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ "ahash", "arrow", @@ -901,8 +907,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", @@ -912,8 +919,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", @@ -938,8 +946,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ "ahash", "arrow", @@ -958,8 +967,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ "ahash", "arrow", @@ -971,8 +981,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" dependencies = [ "arrow", "arrow-array", @@ -993,8 +1004,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1007,8 +1019,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1016,8 +1029,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" dependencies = [ "arrow", "async-trait", @@ -1035,8 +1049,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" dependencies = [ "ahash", "arrow", @@ -1062,8 +1077,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ "ahash", "arrow", @@ -1075,8 +1091,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ "arrow", "arrow-schema", @@ -1090,8 +1107,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ "ahash", "arrow", @@ -1124,8 +1142,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" dependencies = [ "arrow", "chrono", @@ -1139,8 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" dependencies = [ "arrow", "chrono", @@ -1173,8 +1193,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ "arrow", "arrow-array", @@ -1190,8 +1211,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "42.1.0" -source = "git+https://github.com/apache/datafusion.git?branch=main#1fd6116dd9e1898540b4fbdbba735c4ebacc4227" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9c768d2b4c4485c43afbaeeb86dd1f2ac3fb34a9e6e8c8b06180d2a223d5ba" dependencies = [ "arrow-buffer", "async-recursion", diff --git a/Cargo.toml b/Cargo.toml index e0f2461e..11ce08c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,10 +37,10 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-substrait = { version = "42.0.0", optional = true } -datafusion-proto = { version = "42.0.0" } -datafusion-functions-window-common = { version = "42.0.0" } +datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-substrait = { version = "43.0.0", optional = true } +datafusion-proto = { version = "43.0.0" } +datafusion-functions-window-common = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -49,12 +49,6 @@ futures = "0.3" object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } url = "2" -[patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion.git", branch = "main" } -datafusion-substrait = { git = "https://github.com/apache/datafusion.git", branch = "main" } -datafusion-proto = { git = "https://github.com/apache/datafusion.git", branch = "main" } -datafusion-functions-window-common = { git = "https://github.com/apache/datafusion.git", branch = "main" } - [build-dependencies] prost-types = "0.13" # keep in line with `datafusion-substrait` pyo3-build-config = "0.22" diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 39e5ffe6..a2521dd0 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -468,6 +468,13 @@ def test_simple_select(ctx, tmp_path, arr): batches = ctx.sql("SELECT a AS tt FROM t").collect() result = batches[0].column(0) + # In DF 43.0.0 we now default to having BinaryView and StringView + # so the array that is saved to the parquet is slightly different + # than the array read. Convert to values for comparison. + if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray): + arr = arr.tolist() + result = result.tolist() + np.testing.assert_equal(result, arr) diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 8aeec326..40f0a6a6 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -90,6 +90,7 @@ impl PyLogicalPlan { | LogicalPlan::Ddl(_) | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) + | LogicalPlan::Execute(_) | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( "Conversion of variant not implemented: {:?}", self.plan From 380a2ac64a1db4c0e97d8e443ab80932be634bbe Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 9 Nov 2024 10:10:36 -0500 Subject: [PATCH 16/19] Update tests to look for string_view type --- examples/tpch/_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 903b5354..f8a5f917 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -25,7 +25,7 @@ def df_selection(col_name, col_type): if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): return F.round(col(col_name), lit(2)).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) @@ -43,7 +43,7 @@ def load_schema(col_name, col_type): def expected_selection(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return F.trim(col(col_name)).cast(col_type).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) From 73cfddf54785843e495dd9d8a784530f22406acd Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 9 Nov 2024 10:10:53 -0500 Subject: [PATCH 17/19] String view is now the default type for strings --- python/datafusion/expr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 5c14362a..df19dfc6 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -374,6 +374,8 @@ def literal(value: Any) -> Expr: ``value`` must be a valid PyArrow scalar value or easily castable to one. """ + if isinstance(value, str): + value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) return Expr(expr_internal.Expr.literal(value)) @@ -419,7 +421,7 @@ def fill_null(self, value: Any | Expr | None = None) -> Expr: _to_pyarrow_types = { float: pa.float64(), int: pa.int64(), - str: pa.string(), + str: pa.string_view(), bool: pa.bool_(), } From fcb5f968e3f11cbcdf77d4c86e5d437de7427f11 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 9 Nov 2024 13:41:13 -0500 Subject: [PATCH 18/19] Making a variety of adjustments in wrappers and unit tests to account for the switch from string to string_view as default --- python/datafusion/expr.py | 2 +- python/datafusion/functions.py | 13 +++++-- python/tests/test_expr.py | 8 +++- python/tests/test_functions.py | 67 ++++++++++++++++++++++++---------- 4 files changed, 63 insertions(+), 27 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index df19dfc6..b1072438 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -421,7 +421,7 @@ def fill_null(self, value: Any | Expr | None = None) -> Expr: _to_pyarrow_types = { float: pa.float64(), int: pa.int64(), - str: pa.string_view(), + str: pa.string(), bool: pa.bool_(), } diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 570a6ce5..9552876f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -295,7 +295,7 @@ def decode(input: Expr, encoding: Expr) -> Expr: def array_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation.""" - return Expr(f.array_to_string(expr.expr, delimiter.expr)) + return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) def array_join(expr: Expr, delimiter: Expr) -> Expr: @@ -924,7 +924,7 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: return f.to_timestamp(arg.expr) formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp(arg.expr, *formatters)) + return Expr(f.to_timestamp(arg.expr.cast(pa.string()), *formatters)) def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: @@ -1065,7 +1065,10 @@ def struct(*args: Expr) -> Expr: def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: """Returns a struct with the given names and arguments pairs.""" - name_pair_exprs = [[Expr.literal(pair[0]), pair[1]] for pair in name_pairs] + name_pair_exprs = [ + [Expr.literal(pa.scalar(pair[0], type=pa.string())), pair[1]] + for pair in name_pairs + ] # flatten name_pairs = [x.expr for xs in name_pair_exprs for x in xs] @@ -1422,7 +1425,9 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False) nulls_first = "NULLS FIRST" if null_first else "NULLS LAST" return Expr( f.array_sort( - array.expr, Expr.literal(desc).expr, Expr.literal(nulls_first).expr + array.expr, + Expr.literal(pa.scalar(desc, type=pa.string())).expr, + Expr.literal(pa.scalar(nulls_first, type=pa.string())).expr, ) ) diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index d81e04c8..77f88aa4 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -130,7 +130,10 @@ def test_relational_expr(test_ctx): ctx = SessionContext() batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array(["alpha", "beta", "gamma"])], + [ + pa.array([1, 2, 3]), + pa.array(["alpha", "beta", "gamma"], type=pa.string_view()), + ], names=["a", "b"], ) df = ctx.create_dataframe([[batch]], name="batch_array") @@ -145,7 +148,8 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - assert df.filter(col("a") == "beta").count() == 0 + with pytest.raises(Exception): + df.filter(col("a") == "beta").count() def test_expr_to_variant(): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e6fd41d8..1ba4cfd8 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -34,9 +34,9 @@ def df(): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [ - pa.array(["Hello", "World", "!"]), + pa.array(["Hello", "World", "!"], type=pa.string_view()), pa.array([4, 5, 6]), - pa.array(["hello ", " world ", " !"]), + pa.array(["hello ", " world ", " !"], type=pa.string_view()), pa.array( [ datetime(2022, 12, 31), @@ -88,8 +88,8 @@ def test_literal(df): assert len(result) == 1 result = result[0] assert result.column(0) == pa.array([1] * 3) - assert result.column(1) == pa.array(["1"] * 3) - assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(1) == pa.array(["1"] * 3, type=pa.string_view()) + assert result.column(2) == pa.array(["OK"] * 3, type=pa.string_view()) assert result.column(3) == pa.array([3.14] * 3) assert result.column(4) == pa.array([True] * 3) assert result.column(5) == pa.array([b"hello world"] * 3) @@ -97,7 +97,9 @@ def test_literal(df): def test_lit_arith(df): """Test literals with arithmetic operations""" - df = df.select(literal(1) + column("b"), f.concat(column("a"), literal("!"))) + df = df.select( + literal(1) + column("b"), f.concat(column("a").cast(pa.string()), literal("!")) + ) result = df.collect() assert len(result) == 1 result = result[0] @@ -578,21 +580,33 @@ def test_array_function_obj_tests(stmt, py_expr): f.ascii(column("a")), pa.array([72, 87, 33], type=pa.int32()), ), # H = 72; W = 87; ! = 33 - (f.bit_length(column("a")), pa.array([40, 40, 8], type=pa.int32())), - (f.btrim(literal(" World ")), pa.array(["World", "World", "World"])), + ( + f.bit_length(column("a").cast(pa.string())), + pa.array([40, 40, 8], type=pa.int32()), + ), + ( + f.btrim(literal(" World ")), + pa.array(["World", "World", "World"], type=pa.string_view()), + ), (f.character_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), (f.chr(literal(68)), pa.array(["D", "D", "D"])), ( f.concat_ws("-", column("a"), literal("test")), pa.array(["Hello-test", "World-test", "!-test"]), ), - (f.concat(column("a"), literal("?")), pa.array(["Hello?", "World?", "!?"])), + ( + f.concat(column("a").cast(pa.string()), literal("?")), + pa.array(["Hello?", "World?", "!?"]), + ), (f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])), (f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])), (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), (f.lower(column("a")), pa.array(["hello", "world", "!"])), (f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])), - (f.ltrim(column("c")), pa.array(["hello ", "world ", "!"])), + ( + f.ltrim(column("c")), + pa.array(["hello ", "world ", "!"], type=pa.string_view()), + ), ( f.md5(column("a")), pa.array( @@ -618,19 +632,25 @@ def test_array_function_obj_tests(stmt, py_expr): f.rpad(column("a"), literal(8)), pa.array(["Hello ", "World ", "! "]), ), - (f.rtrim(column("c")), pa.array(["hello", " world", " !"])), + ( + f.rtrim(column("c")), + pa.array(["hello", " world", " !"], type=pa.string_view()), + ), ( f.split_part(column("a"), literal("l"), literal(1)), pa.array(["He", "Wor", "!"]), ), (f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])), (f.strpos(column("a"), literal("o")), pa.array([5, 2, 0], type=pa.int32())), - (f.substr(column("a"), literal(3)), pa.array(["llo", "rld", ""])), + ( + f.substr(column("a"), literal(3)), + pa.array(["llo", "rld", ""], type=pa.string_view()), + ), ( f.translate(column("a"), literal("or"), literal("ld")), pa.array(["Helll", "Wldld", "!"]), ), - (f.trim(column("c")), pa.array(["hello", "world", "!"])), + (f.trim(column("c")), pa.array(["hello", "world", "!"], type=pa.string_view())), (f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])), (f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])), ( @@ -772,9 +792,9 @@ def test_temporal_functions(df): f.date_trunc(literal("month"), column("d")), f.datetrunc(literal("day"), column("d")), f.date_bin( - literal("15 minutes"), + literal("15 minutes").cast(pa.string()), column("d"), - literal("2001-01-01 00:02:30"), + literal("2001-01-01 00:02:30").cast(pa.string()), ), f.from_unixtime(literal(1673383974)), f.to_timestamp(literal("2023-09-07 05:06:14.523952")), @@ -836,8 +856,8 @@ def test_case(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([10, 8, 8]) - assert result.column(1) == pa.array(["Hola", "Mundo", "!!"]) - assert result.column(2) == pa.array(["Hola", "Mundo", None]) + assert result.column(1) == pa.array(["Hola", "Mundo", "!!"], type=pa.string_view()) + assert result.column(2) == pa.array(["Hola", "Mundo", None], type=pa.string_view()) def test_when_with_no_base(df): @@ -855,8 +875,10 @@ def test_when_with_no_base(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([4, 5, 6]) - assert result.column(1) == pa.array(["too small", "just right", "too big"]) - assert result.column(2) == pa.array(["Hello", None, None]) + assert result.column(1) == pa.array( + ["too small", "just right", "too big"], type=pa.string_view() + ) + assert result.column(2) == pa.array(["Hello", None, None], type=pa.string_view()) def test_regr_funcs_sql(df): @@ -999,8 +1021,13 @@ def test_regr_funcs_df(func, expected): def test_binary_string_functions(df): df = df.select( - f.encode(column("a"), literal("base64")), - f.decode(f.encode(column("a"), literal("base64")), literal("base64")), + f.encode(column("a").cast(pa.string()), literal("base64").cast(pa.string())), + f.decode( + f.encode( + column("a").cast(pa.string()), literal("base64").cast(pa.string()) + ), + literal("base64").cast(pa.string()), + ), ) result = df.collect() assert len(result) == 1 From bca6fa6428362363789a919e0426f957feb9fb83 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 9 Nov 2024 14:28:27 -0500 Subject: [PATCH 19/19] Resolve errors in doc building --- python/datafusion/functions.py | 2 +- python/datafusion/udf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 9552876f..a1b6b20a 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -924,7 +924,7 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: return f.to_timestamp(arg.expr) formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp(arg.expr.cast(pa.string()), *formatters)) + return Expr(f.to_timestamp(arg.expr, *formatters)) def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 291ef2ba..d9d994b2 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -229,6 +229,7 @@ def udaf( which this UDAF is used. The following examples are all valid. .. code-block:: python + import pyarrow as pa import pyarrow.compute as pc