Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to arrow 36 #5685

Merged
merged 8 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.64"

[workspace.dependencies]
arrow = { version = "34.0.0", features = ["prettyprint"] }
arrow-flight = { version = "34.0.0", features = ["flight-sql-experimental"] }
arrow-buffer = { version = "34.0.0", default-features = false }
arrow-schema = { version = "34.0.0", default-features = false }
arrow-array = { version = "34.0.0", default-features = false, features = ["chrono-tz"] }
parquet = { version = "34.0.0", features = ["arrow", "async"] }
arrow = { version = "36.0.0", features = ["prettyprint"] }
arrow-flight = { version = "36.0.0", features = ["flight-sql-experimental"] }
arrow-buffer = { version = "36.0.0", default-features = false }
arrow-schema = { version = "36.0.0", default-features = false }
arrow-array = { version = "36.0.0", default-features = false, features = ["chrono-tz"] }
parquet = { version = "36.0.0", features = ["arrow", "async"] }

[profile.release]
codegen-units = 1
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ async fn main() -> Result<()> {
let compression = match opt.compression.as_str() {
"none" => Compression::UNCOMPRESSED,
"snappy" => Compression::SNAPPY,
"brotli" => Compression::BROTLI,
"gzip" => Compression::GZIP,
"brotli" => Compression::BROTLI(Default::default()),
"gzip" => Compression::GZIP(Default::default()),
"lz4" => Compression::LZ4,
"lz0" => Compression::LZO,
"zstd" => Compression::ZSTD,
"zstd" => Compression::ZSTD(Default::default()),
other => {
return Err(DataFusionError::NotImplemented(format!(
"Invalid compression format: {other}"
Expand Down
63 changes: 32 additions & 31 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.62"
readme = "README.md"

[dependencies]
arrow = "34.0.0"
arrow = "36.0.0"
async-trait = "0.1.41"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "21.0.0" }
Expand Down
4 changes: 1 addition & 3 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@ mod tests {
fn s3_region_validation() {
let s3 = "s3://bucket_name/path";
let registry = DatafusionCliObjectStoreRegistry::new();
let err = registry
.get_store(&Url::from_str(s3).unwrap())
.unwrap_err();
let err = registry.get_store(&Url::from_str(s3).unwrap()).unwrap_err();
assert!(err.to_string().contains("Generic S3 error: Missing region"));

env::set_var("AWS_REGION", "us-east-1");
Expand Down
4 changes: 0 additions & 4 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3437,10 +3437,6 @@ mod tests {
ScalarValue::Decimal128(None, 10, 2),
ScalarValue::try_from_array(&array, 3).unwrap()
);
assert_eq!(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #5441 (comment)

This test was wrong

ScalarValue::Decimal128(None, 10, 2),
ScalarValue::try_from_array(&array, 4).unwrap()
);

Ok(())
}
Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,10 @@ async fn test_fn_regexp_match() -> Result<()> {
"+-----------------------------------+",
"| regexpmatch(test.a,Utf8(\"[a-z]\")) |",
"+-----------------------------------+",
"| [] |",
"| [] |",
"| [] |",
"| [] |",
"| [a] |",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug fix -- see apache/arrow-rs#3803

"| [a] |",
"| [d] |",
"| [b] |",
"+-----------------------------------+",
];

Expand Down
8 changes: 2 additions & 6 deletions datafusion/core/tests/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,15 +1301,11 @@ where
A: ArrowTimestampType<Native = i64>,
{
let schema = Arc::new(Schema::new(vec![
Field::new(
"ts",
DataType::Timestamp(A::get_time_unit(), tz.clone()),
false,
),
Field::new("ts", DataType::Timestamp(A::UNIT, tz.clone()), false),
Field::new("value", DataType::Int32, true),
]));

let divisor = match A::get_time_unit() {
let divisor = match A::UNIT {
TimeUnit::Nanosecond => 1,
TimeUnit::Microsecond => 1000,
TimeUnit::Millisecond => 1_000_000,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sql/set_variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ async fn set_time_zone_bad_time_zone_format() {
.await
.unwrap();
let err = pretty_format_batches(&result).err().unwrap().to_string();
assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": Expected format [+-]XX:XX, [+-]XX, or [+-]XXXX");
assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": '+08:00:00' is not a valid timezone");

plan_and_collect(&ctx, "SET TIME ZONE = '08:00'")
.await
Expand Down
27 changes: 8 additions & 19 deletions datafusion/core/tests/sqllogictests/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1390,7 +1390,7 @@ as values
('2021-1-1T05:11:10.432', 'Row 3');


statement ok
statement error DataFusion error: Arrow error: Parser error: Error parsing timestamp from '2021-1-1T05:11:10.432': error parsing date
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should change the data in this test so it passes

Also, it looks like a slight regression to me in that parsing 2021-1-1 used to parse and now doesn't.

So I suggest:

1. Change this test from

2021-1-1T05:11:10.432

to something like

2021-01-01T05:11:10.432

2 file an upstream ticket in arrow-rs to be more lenient

Aka to accept 2021-1-1T05:11:10.432 as valid -- I can do this if you like

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Change this test from

Will do

2 file an upstream ticket in arrow-rs to be more lenient

I would strongly request we do not do this, it will severely hurt performance for an incredibly esoteric use-case. It isn't actually documented that chrono supports this, and we have never intentionally supported it either

create table t as
select
arrow_cast(column1, 'Timestamp(Nanosecond, None)') as nanos,
Expand All @@ -1401,49 +1401,38 @@ select
from t_source;

# Demonstate the contents
query PPPPT
query error DataFusion error: Error during planning: table 'datafusion.public.t' not found
select * from t;
----
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1
NULL NULL NULL NULL Row 2
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3


# aggregate_timestamps_sum
statement error Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)
statement error DataFusion error: Error during planning: table 'datafusion.public.t' not found
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;

# aggregate_timestamps_count
query IIII
query error DataFusion error: Error during planning: table 'datafusion.public.t' not found
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t;
----
3 3 3 3


# aggregate_timestamps_min
query PPPP
query error DataFusion error: Error during planning: table 'datafusion.public.t' not found
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t;
----
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10

# aggregate_timestamps_max
query PPPP
query error DataFusion error: Error during planning: table 'datafusion.public.t' not found
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t;
----
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10



# aggregate_timestamps_avg
statement error Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\).
statement error DataFusion error: Error during planning: table 'datafusion.public.t' not found
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t


statement ok
drop table t_source;

statement ok
statement error DataFusion error: Execution error: Table 't' doesn't exist.
drop table t;

# All supported time types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ query error Cannot automatically convert Interval\(DayTime\) to Interval\(MonthD
---
select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)');

query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Interval\(MonthDayNano\)
query error DataFusion error: This feature is not implemented: Can't create a scalar from array of type "Interval\(MonthDayNano\)"
select arrow_cast('30 minutes', 'Interval(MonthDayNano)');


Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sqllogictests/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ h
## Plan error when compare Utf8 and timestamp in where clause
statement error DataFusion error: Error during planning: The type of Timestamp\(Nanosecond, Some\("\+00:00"\)\) Plus Utf8 of binary physical should be same
select i_item_desc from test
where d3_date > now() + '5 days';
where d3_date > now() + '5 days';
2 changes: 1 addition & 1 deletion datafusion/core/tests/sqllogictests/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -564,4 +564,4 @@ insert into foo values (null);
query I
select * from foo;
----
NULL
NULL
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,4 @@ SELECT INTERVAL '8' YEAR + '2000-01-01T00:00:00'::timestamp;
query P
SELECT INTERVAL '8' MONTH + '2000-01-01T00:00:00'::timestamp;
----
2000-09-01T00:00:00
2000-09-01T00:00:00
Loading