Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to arrow 36 #5685

Merged
merged 8 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.64"

[workspace.dependencies]
arrow = { version = "34.0.0", features = ["prettyprint"] }
arrow-flight = { version = "34.0.0", features = ["flight-sql-experimental"] }
arrow-buffer = { version = "34.0.0", default-features = false }
arrow-schema = { version = "34.0.0", default-features = false }
arrow-array = { version = "34.0.0", default-features = false, features = ["chrono-tz"] }
parquet = { version = "34.0.0", features = ["arrow", "async"] }
arrow = { version = "36.0.0", features = ["prettyprint"] }
arrow-flight = { version = "36.0.0", features = ["flight-sql-experimental"] }
arrow-buffer = { version = "36.0.0", default-features = false }
arrow-schema = { version = "36.0.0", default-features = false }
arrow-array = { version = "36.0.0", default-features = false, features = ["chrono-tz"] }
parquet = { version = "36.0.0", features = ["arrow", "async"] }

[profile.release]
codegen-units = 1
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ async fn main() -> Result<()> {
let compression = match opt.compression.as_str() {
"none" => Compression::UNCOMPRESSED,
"snappy" => Compression::SNAPPY,
"brotli" => Compression::BROTLI,
"gzip" => Compression::GZIP,
"brotli" => Compression::BROTLI(Default::default()),
"gzip" => Compression::GZIP(Default::default()),
"lz4" => Compression::LZ4,
"lz0" => Compression::LZO,
"zstd" => Compression::ZSTD,
"zstd" => Compression::ZSTD(Default::default()),
other => {
return Err(DataFusionError::NotImplemented(format!(
"Invalid compression format: {other}"
Expand Down
63 changes: 32 additions & 31 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.62"
readme = "README.md"

[dependencies]
arrow = "34.0.0"
arrow = "36.0.0"
async-trait = "0.1.41"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "21.0.0" }
Expand Down
4 changes: 1 addition & 3 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@ mod tests {
fn s3_region_validation() {
let s3 = "s3://bucket_name/path";
let registry = DatafusionCliObjectStoreRegistry::new();
let err = registry
.get_store(&Url::from_str(s3).unwrap())
.unwrap_err();
let err = registry.get_store(&Url::from_str(s3).unwrap()).unwrap_err();
assert!(err.to_string().contains("Generic S3 error: Missing region"));

env::set_var("AWS_REGION", "us-east-1");
Expand Down
4 changes: 0 additions & 4 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3437,10 +3437,6 @@ mod tests {
ScalarValue::Decimal128(None, 10, 2),
ScalarValue::try_from_array(&array, 3).unwrap()
);
assert_eq!(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #5441 (comment)

This test was wrong

ScalarValue::Decimal128(None, 10, 2),
ScalarValue::try_from_array(&array, 4).unwrap()
);

Ok(())
}
Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,10 @@ async fn test_fn_regexp_match() -> Result<()> {
"+-----------------------------------+",
"| regexpmatch(test.a,Utf8(\"[a-z]\")) |",
"+-----------------------------------+",
"| [] |",
"| [] |",
"| [] |",
"| [] |",
"| [a] |",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug fix -- see apache/arrow-rs#3803

"| [a] |",
"| [d] |",
"| [b] |",
"+-----------------------------------+",
];

Expand Down
8 changes: 2 additions & 6 deletions datafusion/core/tests/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,15 +1301,11 @@ where
A: ArrowTimestampType<Native = i64>,
{
let schema = Arc::new(Schema::new(vec![
Field::new(
"ts",
DataType::Timestamp(A::get_time_unit(), tz.clone()),
false,
),
Field::new("ts", DataType::Timestamp(A::UNIT, tz.clone()), false),
Field::new("value", DataType::Int32, true),
]));

let divisor = match A::get_time_unit() {
let divisor = match A::UNIT {
TimeUnit::Nanosecond => 1,
TimeUnit::Microsecond => 1000,
TimeUnit::Millisecond => 1_000_000,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sql/set_variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ async fn set_time_zone_bad_time_zone_format() {
.await
.unwrap();
let err = pretty_format_batches(&result).err().unwrap().to_string();
assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": Expected format [+-]XX:XX, [+-]XX, or [+-]XXXX");
assert_eq!(err, "Parser error: Invalid timezone \"+08:00:00\": '+08:00:00' is not a valid timezone");

plan_and_collect(&ctx, "SET TIME ZONE = '08:00'")
.await
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,7 @@ as values
('2018-11-13T17:11:10.011375885995', 'Row 0'),
('2011-12-13T11:13:10.12345', 'Row 1'),
(null, 'Row 2'),
('2021-1-1T05:11:10.432', 'Row 3');
('2021-01-01T05:11:10.432', 'Row 3');
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arrow no longer supports timestamps of this shortened form, it has never been documented to support them, and it is an accidental undocumented quirk of chrono that it did. I would very strongly resist changing this, being able to predict the digit locations ahead of time is critical to avoiding a data dependency when parsing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed apache/arrow-rs#3969 to track (I don't think we necessarily need to change arrow to support these timestamps, it but it will reduce confusion to have the change documented)



statement ok
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ query error Cannot automatically convert Interval\(DayTime\) to Interval\(MonthD
---
select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)');

query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Interval\(MonthDayNano\)
query error DataFusion error: This feature is not implemented: Can't create a scalar from array of type "Interval\(MonthDayNano\)"
select arrow_cast('30 minutes', 'Interval(MonthDayNano)');


Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/sqllogictests/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ h
## Plan error when compare Utf8 and timestamp in where clause
statement error DataFusion error: Error during planning: The type of Timestamp\(Nanosecond, Some\("\+00:00"\)\) Plus Utf8 of binary physical should be same
select i_item_desc from test
where d3_date > now() + '5 days';
where d3_date > now() + '5 days';
2 changes: 1 addition & 1 deletion datafusion/core/tests/sqllogictests/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -564,4 +564,4 @@ insert into foo values (null);
query I
select * from foo;
----
NULL
NULL
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,4 @@ SELECT INTERVAL '8' YEAR + '2000-01-01T00:00:00'::timestamp;
query P
SELECT INTERVAL '8' MONTH + '2000-01-01T00:00:00'::timestamp;
----
2000-09-01T00:00:00
2000-09-01T00:00:00
1 change: 0 additions & 1 deletion datafusion/core/tests/sqllogictests/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -2073,4 +2073,3 @@ SELECT SUM(c12) OVER(ORDER BY c1, c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
2.994840293343 NULL
9.674390599321 NULL
7.728066219895 NULL

Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,8 @@ mod tests {
.project(proj)?
.build()?;

let expected = "Error parsing 'I'M NOT A TIMESTAMP' as timestamp";
let expected =
"Error parsing timestamp from 'I'M NOT A TIMESTAMP': error parsing date";
let actual = get_optimized_plan_err(&plan, &Utc::now());
assert_contains!(actual, expected);
Ok(())
Expand Down
14 changes: 7 additions & 7 deletions datafusion/physical-expr/src/expressions/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,9 @@ where
T: ArrayAccessor,
T::Item: PartialEq + HashValue,
{
let data = array.data();

let state = RandomState::new();
let mut map: HashMap<usize, (), ()> =
HashMap::with_capacity_and_hasher(data.len(), ());
HashMap::with_capacity_and_hasher(array.len(), ());

let insert_value = |idx| {
let value = array.value(idx);
Expand All @@ -161,10 +159,12 @@ where
}
};

match data.null_buffer() {
Some(buffer) => BitIndexIterator::new(buffer.as_ref(), data.offset(), data.len())
.for_each(insert_value),
None => (0..data.len()).for_each(insert_value),
match array.nulls() {
Some(nulls) => {
BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len())
.for_each(insert_value)
}
None => (0..array.len()).for_each(insert_value),
}

ArrayHashSet { state, map }
Expand Down
Loading