From 37a197cfa47a99fd57c38d548a67655f2def0788 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 1 Mar 2023 13:47:43 +0000 Subject: [PATCH 1/6] Update to NullBuffer in ArrayData --- Cargo.toml | 7 +++ datafusion-cli/Cargo.lock | 45 +++++++------------ datafusion-cli/Cargo.toml | 6 +++ .../physical-expr/src/expressions/in_list.rs | 8 ++-- .../physical-expr/src/regex_expressions.rs | 5 +-- datafusion/proto/Cargo.toml | 2 +- 6 files changed, 35 insertions(+), 38 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 96bbc377eeee..f216aaceb73d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,3 +51,10 @@ opt-level = 3 overflow-checks = false panic = 'unwind' rpath = false + +[patch.crates-io] +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e83c77d7a15d..3077dbf2a4fc 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -69,8 +69,7 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "ahash", "arrow-arith", @@ -92,8 +91,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -107,8 +105,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "ahash", "arrow-buffer", @@ -123,8 +120,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b4ec72eda7c0207727df96cf200f539749d736b21f3e782ece113e18c1a0a7" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "half", "num", @@ -133,8 +129,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -149,8 +144,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -168,8 +162,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-buffer", "arrow-schema", @@ -180,8 +173,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -194,8 +186,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -213,8 +204,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -227,8 +217,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "ahash", "arrow-array", @@ -242,14 +231,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" [[package]] name = "arrow-select" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -261,8 +248,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "arrow-array", "arrow-buffer", @@ -1759,8 +1745,7 @@ dependencies = [ [[package]] name = "parquet" version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 53404dcfe0cf..90aeeeb89fe2 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -41,3 +41,9 @@ parking_lot = { version = "0.12" } rustyline = "10.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } url = "2.2" + +[patch.crates-io] +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index 3a5a25ff66cf..e3672047900d 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -162,9 +162,11 @@ where } }; - match data.null_buffer() { - Some(buffer) => BitIndexIterator::new(buffer.as_ref(), data.offset(), data.len()) - .for_each(insert_value), + match data.nulls() { + Some(nulls) => { + BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len()) + .for_each(insert_value) + } None => (0..data.len()).for_each(insert_value), } diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs index ebc53e3225ce..e3edd02481d7 100644 --- a/datafusion/physical-expr/src/regex_expressions.rs +++ b/datafusion/physical-expr/src/regex_expressions.rs @@ -265,10 +265,7 @@ fn _regexp_replace_static_pattern_replace( let data = ArrayData::try_new( GenericStringArray::::DATA_TYPE, string_array.len(), - string_array - .data_ref() - .null_buffer() - .map(|b| b.bit_slice(string_array.offset(), string_array.len())), + string_array.data().nulls().map(|b| b.inner().sliced()), 0, vec![new_offsets.finish(), vals.finish()], vec![], diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index bcf0dac02d94..7dbcee9428fc 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -60,4 +60,4 @@ tokio = "1.18" [build-dependencies] # Pin these dependencies so that the generated output is deterministic pbjson-build = { version = "=0.5.1" } -prost-build = { version = "=0.11.7" } +prost-build = { version = "=0.11.8" } From 4356cd14709d3a61f869759fb32df9beb9602a43 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 1 Mar 2023 14:07:41 +0000 Subject: [PATCH 2/6] Fix test --- datafusion/common/src/scalar.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 66c1f3f12552..1149f5ca3064 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -2979,10 +2979,6 @@ mod tests { ScalarValue::Decimal128(None, 10, 2), ScalarValue::try_from_array(&array, 3).unwrap() ); - assert_eq!( - ScalarValue::Decimal128(None, 10, 2), - ScalarValue::try_from_array(&array, 4).unwrap() - ); Ok(()) } From 58ad1e7ee6eba88e33d126adfdeb181e453bf654 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 10 Mar 2023 18:49:50 +0000 Subject: [PATCH 3/6] Update pin --- Cargo.toml | 18 +++++----- datafusion-cli/Cargo.lock | 62 +++++++++++++++++----------------- datafusion-cli/Cargo.toml | 10 +++--- datafusion-examples/Cargo.toml | 2 +- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9caae424ef26..374fa3ede87f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,10 +46,10 @@ repository = "https://github.com/apache/arrow-datafusion" rust-version = "1.64" [workspace.dependencies] -arrow = { version = "34.0.0", features = ["prettyprint"] } -arrow-buffer = "34.0.0" -arrow-schema = "34.0.0" -parquet = { version = "34.0.0", features = ["arrow", "async"] } +arrow = { version = "35.0.0", features = ["prettyprint"] } +arrow-buffer = "35.0.0" +arrow-schema = "35.0.0" +parquet = { version = "35.0.0", features = ["arrow", "async"] } [profile.release] codegen-units = 1 @@ -70,8 +70,8 @@ panic = 'unwind' rpath = false [patch.crates-io] -arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 7548c65d88e2..f173f6a66449 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -68,8 +68,8 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "ahash", "arrow-arith", @@ -85,13 +85,12 @@ dependencies = [ "arrow-schema", "arrow-select", "arrow-string", - "comfy-table", ] [[package]] name = "arrow-arith" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -104,8 +103,8 @@ dependencies = [ [[package]] name = "arrow-array" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "ahash", "arrow-buffer", @@ -119,8 +118,8 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "half", "num", @@ -128,8 +127,8 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -137,14 +136,15 @@ dependencies = [ "arrow-schema", "arrow-select", "chrono", + "comfy-table", "lexical-core", "num", ] [[package]] name = "arrow-csv" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -161,8 +161,8 @@ dependencies = [ [[package]] name = "arrow-data" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-buffer", "arrow-schema", @@ -172,8 +172,8 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -185,8 +185,8 @@ dependencies = [ [[package]] name = "arrow-json" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -203,8 +203,8 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,8 +216,8 @@ dependencies = [ [[package]] name = "arrow-row" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "ahash", "arrow-array", @@ -230,13 +230,13 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" [[package]] name = "arrow-select" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -247,8 +247,8 @@ dependencies = [ [[package]] name = "arrow-string" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "arrow-array", "arrow-buffer", @@ -1768,8 +1768,8 @@ dependencies = [ [[package]] name = "parquet" -version = "34.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=5f22cd4e6fa16729e946cbdfbacfa27692f35ab8#5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" +version = "35.0.0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 90aeeeb89fe2..c0b52c77adcd 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -29,7 +29,7 @@ rust-version = "1.62" readme = "README.md" [dependencies] -arrow = "34.0.0" +arrow = "35.0.0" async-trait = "0.1.41" clap = { version = "3", features = ["derive", "cargo"] } datafusion = { path = "../datafusion/core", version = "19.0.0" } @@ -43,7 +43,7 @@ tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync url = "2.2" [patch.crates-io] -arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } -parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "5f22cd4e6fa16729e946cbdfbacfa27692f35ab8" } +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 613d3e934aed..4c17f87c0a13 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -36,7 +36,7 @@ required-features = ["datafusion/avro"] [dev-dependencies] arrow = { workspace = true } -arrow-flight = { version = "34.0.0", features = ["flight-sql-experimental"] } +arrow-flight = { version = "35.0.0", features = ["flight-sql-experimental"] } arrow-schema = { workspace = true } async-trait = "0.1.41" dashmap = "5.4" From a165110173d0ab9cefcf4f7bd1be7fd163afeade Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 14 Mar 2023 12:52:39 +0000 Subject: [PATCH 4/6] Remove pin --- Cargo.toml | 7 ------ datafusion-cli/Cargo.lock | 45 ++++++++++++++++++++++++++------------- datafusion-cli/Cargo.toml | 6 ------ 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 374fa3ede87f..b6eadecd2418 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,10 +68,3 @@ opt-level = 3 overflow-checks = false panic = 'unwind' rpath = false - -[patch.crates-io] -arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index f173f6a66449..fba8cacd2326 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -69,7 +69,8 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73bdeeaf5bbeeb40c6e14849520b379cd22d8f605433e7c12a1d550adf8c4a06" dependencies = [ "ahash", "arrow-arith", @@ -90,7 +91,8 @@ dependencies = [ [[package]] name = "arrow-arith" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a945eab89f800ab870b848a6105b464638271565d4ac80c439a349f6dae349" dependencies = [ "arrow-array", "arrow-buffer", @@ -104,7 +106,8 @@ dependencies = [ [[package]] name = "arrow-array" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43489bbff475545b78b0e20bde1d22abd6c99e54499839f9e815a2fa5134a51b" dependencies = [ "ahash", "arrow-buffer", @@ -119,7 +122,8 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3759e4a52c593281184787af5435671dc8b1e78333e5a30242b2e2d6e3c9d1f" dependencies = [ "half", "num", @@ -128,7 +132,8 @@ dependencies = [ [[package]] name = "arrow-cast" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30c01f06172d3e8306fcc885ee97dff55ba8d48dbc2ef5fee2e75b55b8170c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -144,7 +149,8 @@ dependencies = [ [[package]] name = "arrow-csv" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86059a1270d5fe268283447af53bda33f4226fbe9e3f3d109a26011dc97fdcf7" dependencies = [ "arrow-array", "arrow-buffer", @@ -162,7 +168,8 @@ dependencies = [ [[package]] name = "arrow-data" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19c7787c6cdbf9539b1ffb860bfc18c5848926ec3d62cbd52dc3b1ea35c874fd" dependencies = [ "arrow-buffer", "arrow-schema", @@ -173,7 +180,8 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "690167cd0ad8c4444c7bbb573066b94982acb52da554db7b7b2837c0e2dce036" dependencies = [ "arrow-array", "arrow-buffer", @@ -186,7 +194,8 @@ dependencies = [ [[package]] name = "arrow-json" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28c47c78411bc7b77ab0b931b9f8acc8b1c6d7d9977e5895dbaa4632651e5824" dependencies = [ "arrow-array", "arrow-buffer", @@ -204,7 +213,8 @@ dependencies = [ [[package]] name = "arrow-ord" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3945e160dc92c11c4108a13251940ffce418c221eed260c53d9bec4075aa52c" dependencies = [ "arrow-array", "arrow-buffer", @@ -217,7 +227,8 @@ dependencies = [ [[package]] name = "arrow-row" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1951a64d60c37931ee85198e1728ad07e59cda50b9280367911c7d299dfe4bf7" dependencies = [ "ahash", "arrow-array", @@ -231,12 +242,14 @@ dependencies = [ [[package]] name = "arrow-schema" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6b26f6a6f8410e3b9531cbd1886399b99842701da77d4b4cf2013f7708f20f" [[package]] name = "arrow-select" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83deb30a09afdf654d346092ef03e965f9c05d9b0753164d531f41d290d553f4" dependencies = [ "arrow-array", "arrow-buffer", @@ -248,7 +261,8 @@ dependencies = [ [[package]] name = "arrow-string" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7529ba37a8bdc86cb69e00236064d122b3ac80e221270c7abc147fea63a9cee" dependencies = [ "arrow-array", "arrow-buffer", @@ -1769,7 +1783,8 @@ dependencies = [ [[package]] name = "parquet" version = "35.0.0" -source = "git+https://github.com/tustvold/arrow-rs.git?rev=c96274a562625f091ca4c06fca21ac35ef330358#c96274a562625f091ca4c06fca21ac35ef330358" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "695dc9993375343d09c61b555825da484a7e6be0ab4d3f1de9a918850ebec922" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index c0b52c77adcd..c47c36c261b5 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -41,9 +41,3 @@ parking_lot = { version = "0.12" } rustyline = "10.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } url = "2.2" - -[patch.crates-io] -arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } -parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "c96274a562625f091ca4c06fca21ac35ef330358" } From 08321a5610f9ccad978a2499398dc0a3f33d3b0e Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 14 Mar 2023 12:53:24 +0000 Subject: [PATCH 5/6] Update regexpmatch --- datafusion/core/tests/dataframe_functions.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/dataframe_functions.rs b/datafusion/core/tests/dataframe_functions.rs index 8d280692e8c7..2f4e4d9d8c98 100644 --- a/datafusion/core/tests/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe_functions.rs @@ -398,10 +398,10 @@ async fn test_fn_regexp_match() -> Result<()> { "+-----------------------------------+", "| regexpmatch(test.a,Utf8(\"[a-z]\")) |", "+-----------------------------------+", - "| [] |", - "| [] |", - "| [] |", - "| [] |", + "| [a] |", + "| [a] |", + "| [d] |", + "| [b] |", "+-----------------------------------+", ]; From 06deff0c12e3819841af3758f6033998cfd7483a Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 14 Mar 2023 12:59:43 +0000 Subject: [PATCH 6/6] Fixes --- datafusion/core/tests/sql/expr.rs | 32 +++++++++---------- datafusion/core/tests/sql/set_variable.rs | 2 +- datafusion/core/tests/sql/timestamp.rs | 12 +++---- .../simplify_expressions/simplify_exprs.rs | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 3d5f134a8783..7a0be72052aa 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -1279,36 +1279,36 @@ async fn test_extract_date_part() -> Result<()> { "12.0" ); test_expression!( - "EXTRACT(second FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12.12345678" + "EXTRACT(second FROM to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12.123456789" ); test_expression!( - "EXTRACT(millisecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12123.45678" + "EXTRACT(millisecond FROM to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12123.456789" ); test_expression!( - "EXTRACT(microsecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12123456.78" + "EXTRACT(microsecond FROM to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12123456.789" ); test_expression!( - "EXTRACT(nanosecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "1.212345678e10" + "EXTRACT(nanosecond FROM to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "1.2123456789e10" ); test_expression!( - "date_part('second', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12.12345678" + "date_part('second', to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12.123456789" ); test_expression!( - "date_part('millisecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12123.45678" + "date_part('millisecond', to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12123.456789" ); test_expression!( - "date_part('microsecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "12123456.78" + "date_part('microsecond', to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "12123456.789" ); test_expression!( - "date_part('nanosecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", - "1.212345678e10" + "date_part('nanosecond', to_timestamp('2020-09-08T12:00:12.123456789+00:00'))", + "1.2123456789e10" ); Ok(()) } diff --git a/datafusion/core/tests/sql/set_variable.rs b/datafusion/core/tests/sql/set_variable.rs index b7161eb2b162..258d4cb3660a 100644 --- a/datafusion/core/tests/sql/set_variable.rs +++ b/datafusion/core/tests/sql/set_variable.rs @@ -414,7 +414,7 @@ async fn set_time_zone_bad_time_zone_format() { .await .unwrap(); let err = pretty_format_batches(&result).err().unwrap().to_string(); - assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": Expected format [+-]XX:XX, [+-]XX, or [+-]XXXX"); + assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": only offset based timezones supported without chrono-tz feature"); plan_and_collect(&ctx, "SET TIME ZONE = '08:00'") .await diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 128ee1639e3f..9f7dffec9ced 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1411,14 +1411,14 @@ async fn cast_timestamp_before_1970() -> Result<()> { assert_batches_eq!(expected, &actual); - let sql = "select cast('1969-01-01T00:00:00.1Z' as timestamp);"; + let sql = "select cast('1969-01-01T00:00:00.100Z' as timestamp);"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+--------------------------------+", - "| Utf8(\"1969-01-01T00:00:00.1Z\") |", - "+--------------------------------+", - "| 1969-01-01T00:00:00.100 |", - "+--------------------------------+", + "+----------------------------------+", + "| Utf8(\"1969-01-01T00:00:00.100Z\") |", + "+----------------------------------+", + "| 1969-01-01T00:00:00.100 |", + "+----------------------------------+", ]; assert_batches_eq!(expected, &actual); diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 1e9f902f90a8..1aa50fff8110 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -431,7 +431,7 @@ mod tests { .project(proj)? .build()?; - let expected = "Error parsing 'I'M NOT A TIMESTAMP' as timestamp"; + let expected = "Error parsing timestamp from 'I'M NOT A TIMESTAMP'"; let actual = get_optimized_plan_err(&plan, &Utc::now()); assert_contains!(actual, expected); Ok(())