diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 01e094bc4e0b..98c6b659a824 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -19,12 +19,11 @@ use std::any::Any; use std::str::FromStr; use std::sync::{Arc, OnceLock}; -use arrow::array::{Array, ArrayRef, Float64Array}; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array}; use arrow::compute::kernels::cast_utils::IntervalUnit; -use arrow::compute::{binary, cast, date_part, DatePart}; +use arrow::compute::{binary, date_part, DatePart}; use arrow::datatypes::DataType::{ - Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp, Utf8, - Utf8View, + Date32, Date64, Duration, Interval, Time32, Time64, Timestamp, Utf8, Utf8View, }; use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth}; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; @@ -36,11 +35,12 @@ use datafusion_common::cast::{ as_timestamp_microsecond_array, as_timestamp_millisecond_array, as_timestamp_nanosecond_array, as_timestamp_second_array, }; -use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, + ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, + TIMEZONE_WILDCARD, }; #[derive(Debug)] @@ -148,7 +148,21 @@ impl ScalarUDFImpl for DatePartFunc { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Float64) + internal_err!("return_type_from_exprs shoud be called instead") + } + + fn return_type_from_exprs( + &self, + args: &[Expr], + _schema: &dyn ExprSchema, + _arg_types: &[DataType], + ) -> Result { + match &args[0] { + Expr::Literal(ScalarValue::Utf8(Some(part))) if is_epoch(part) => { + Ok(DataType::Float64) + } + _ => Ok(DataType::Int32), + } } fn invoke(&self, args: &[ColumnarValue]) -> Result { @@ -174,35 +188,31 @@ impl ScalarUDFImpl for DatePartFunc { ColumnarValue::Scalar(scalar) => scalar.to_array()?, }; - // to remove quotes at most 2 characters - let part_trim = part.trim_matches(|c| c == '\'' || c == '\"'); - if ![2, 0].contains(&(part.len() - part_trim.len())) { - return exec_err!("Date part '{part}' not supported"); - } + let part_trim = part_normalization(part); // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds") // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) { match interval_unit { - IntervalUnit::Year => date_part_f64(array.as_ref(), DatePart::Year)?, - IntervalUnit::Month => date_part_f64(array.as_ref(), DatePart::Month)?, - IntervalUnit::Week => date_part_f64(array.as_ref(), DatePart::Week)?, - IntervalUnit::Day => date_part_f64(array.as_ref(), DatePart::Day)?, - IntervalUnit::Hour => date_part_f64(array.as_ref(), DatePart::Hour)?, - IntervalUnit::Minute => date_part_f64(array.as_ref(), DatePart::Minute)?, - IntervalUnit::Second => seconds(array.as_ref(), Second)?, - IntervalUnit::Millisecond => seconds(array.as_ref(), Millisecond)?, - IntervalUnit::Microsecond => seconds(array.as_ref(), Microsecond)?, - IntervalUnit::Nanosecond => seconds(array.as_ref(), Nanosecond)?, + IntervalUnit::Year => date_part(array.as_ref(), DatePart::Year)?, + IntervalUnit::Month => date_part(array.as_ref(), DatePart::Month)?, + IntervalUnit::Week => date_part(array.as_ref(), DatePart::Week)?, + IntervalUnit::Day => date_part(array.as_ref(), DatePart::Day)?, + IntervalUnit::Hour => date_part(array.as_ref(), DatePart::Hour)?, + IntervalUnit::Minute => date_part(array.as_ref(), DatePart::Minute)?, + IntervalUnit::Second => seconds_as_i32(array.as_ref(), Second)?, + IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?, + IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?, + IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?, // century and decade are not supported by `DatePart`, although they are supported in postgres _ => return exec_err!("Date part '{part}' not supported"), } } else { // special cases that can be extracted (in postgres) but are not interval units match part_trim.to_lowercase().as_str() { - "qtr" | "quarter" => date_part_f64(array.as_ref(), DatePart::Quarter)?, - "doy" => date_part_f64(array.as_ref(), DatePart::DayOfYear)?, - "dow" => date_part_f64(array.as_ref(), DatePart::DayOfWeekSunday0)?, + "qtr" | "quarter" => date_part(array.as_ref(), DatePart::Quarter)?, + "doy" => date_part(array.as_ref(), DatePart::DayOfYear)?, + "dow" => date_part(array.as_ref(), DatePart::DayOfWeekSunday0)?, "epoch" => epoch(array.as_ref())?, _ => return exec_err!("Date part '{part}' not supported"), } @@ -223,6 +233,18 @@ impl ScalarUDFImpl for DatePartFunc { } } +fn is_epoch(part: &str) -> bool { + let part = part_normalization(part); + matches!(part.to_lowercase().as_str(), "epoch") +} + +// Try to remove quote if exist, if the quote is invalid, return original string and let the downstream function handle the error +fn part_normalization(part: &str) -> &str { + part.strip_prefix(|c| c == '\'' || c == '\"') + .and_then(|s| s.strip_suffix(|c| c == '\'' || c == '\"')) + .unwrap_or(part) +} + static DOCUMENTATION: OnceLock = OnceLock::new(); fn get_date_part_doc() -> &'static Documentation { @@ -261,14 +283,63 @@ fn get_date_part_doc() -> &'static Documentation { }) } -/// Invoke [`date_part`] and cast the result to Float64 -fn date_part_f64(array: &dyn Array, part: DatePart) -> Result { - Ok(cast(date_part(array, part)?.as_ref(), &Float64)?) +/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the +/// result to a total number of seconds, milliseconds, microseconds or +/// nanoseconds +fn seconds_as_i32(array: &dyn Array, unit: TimeUnit) -> Result { + // Nanosecond is neither supported in Postgres nor DuckDB, to avoid to deal with overflow and precision issue we don't support nanosecond + if unit == Nanosecond { + return internal_err!("unit {unit:?} not supported"); + } + + let conversion_factor = match unit { + Second => 1_000_000_000, + Millisecond => 1_000_000, + Microsecond => 1_000, + Nanosecond => 1, + }; + + let second_factor = match unit { + Second => 1, + Millisecond => 1_000, + Microsecond => 1_000_000, + Nanosecond => 1_000_000_000, + }; + + let secs = date_part(array, DatePart::Second)?; + // This assumes array is primitive and not a dictionary + let secs = as_int32_array(secs.as_ref())?; + let subsecs = date_part(array, DatePart::Nanosecond)?; + let subsecs = as_int32_array(subsecs.as_ref())?; + + // Special case where there are no nulls. + if subsecs.null_count() == 0 { + let r: Int32Array = binary(secs, subsecs, |secs, subsecs| { + secs * second_factor + (subsecs % 1_000_000_000) / conversion_factor + })?; + Ok(Arc::new(r)) + } else { + // Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case + // where the number of nanoseconds overflows. + let r: Int32Array = secs + .iter() + .zip(subsecs) + .map(|(secs, subsecs)| { + secs.map(|secs| { + let subsecs = subsecs.unwrap_or(0); + secs * second_factor + (subsecs % 1_000_000_000) / conversion_factor + }) + }) + .collect(); + Ok(Arc::new(r)) + } } /// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the /// result to a total number of seconds, milliseconds, microseconds or /// nanoseconds +/// +/// Given epoch return f64, this is a duplicated function to optimize for f64 type fn seconds(array: &dyn Array, unit: TimeUnit) -> Result { let sf = match unit { Second => 1_f64, diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt index 733c0a3cd972..dfcd92475857 100644 --- a/datafusion/sqllogictest/test_files/clickbench.slt +++ b/datafusion/sqllogictest/test_files/clickbench.slt @@ -136,7 +136,7 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh 519640690937130534 (empty) 2 7418527520126366595 (empty) 1 -query IRTI rowsort +query IITI rowsort SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; ---- -2461439046089301801 18 (empty) 1 diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 31467072dd3e..499d279515c3 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -840,349 +840,340 @@ SELECT EXTRACT("'''year'''" FROM timestamp '2020-09-08T12:00:00+00:00') query error SELECT EXTRACT("'year'" FROM timestamp '2020-09-08T12:00:00+00:00') -query R +query I SELECT date_part('YEAR', CAST('2000-01-01' AS DATE)) ---- 2000 -query R +query I SELECT EXTRACT(year FROM timestamp '2020-09-08T12:00:00+00:00') ---- 2020 -query R +query I SELECT EXTRACT("year" FROM timestamp '2020-09-08T12:00:00+00:00') ---- 2020 -query R +query I SELECT EXTRACT('year' FROM timestamp '2020-09-08T12:00:00+00:00') ---- 2020 -query R +query I SELECT date_part('QUARTER', CAST('2000-01-01' AS DATE)) ---- 1 -query R +query I SELECT EXTRACT(quarter FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 3 -query R +query I SELECT EXTRACT("quarter" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 3 -query R +query I SELECT EXTRACT('quarter' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 3 -query R +query I SELECT date_part('MONTH', CAST('2000-01-01' AS DATE)) ---- 1 -query R +query I SELECT EXTRACT(month FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 9 -query R +query I SELECT EXTRACT("month" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 9 -query R +query I SELECT EXTRACT('month' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 9 -query R +query I SELECT date_part('WEEK', CAST('2003-01-01' AS DATE)) ---- 1 -query R +query I SELECT EXTRACT(WEEK FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 37 -query R +query I SELECT EXTRACT("WEEK" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 37 -query R +query I SELECT EXTRACT('WEEK' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 37 -query R +query I SELECT date_part('DAY', CAST('2000-01-01' AS DATE)) ---- 1 -query R +query I SELECT EXTRACT(day FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 8 -query R +query I SELECT EXTRACT("day" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 8 -query R +query I SELECT EXTRACT('day' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 8 -query R +query I SELECT date_part('DOY', CAST('2000-01-01' AS DATE)) ---- 1 -query R +query I SELECT EXTRACT(doy FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 252 -query R +query I SELECT EXTRACT("doy" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 252 -query R +query I SELECT EXTRACT('doy' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 252 -query R +query I SELECT date_part('DOW', CAST('2000-01-01' AS DATE)) ---- 6 -query R +query I SELECT EXTRACT(dow FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 2 -query R +query I SELECT EXTRACT("dow" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 2 -query R +query I SELECT EXTRACT('dow' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- 2 -query R +query I SELECT date_part('HOUR', CAST('2000-01-01' AS DATE)) ---- 0 -query R +query I SELECT EXTRACT(hour FROM to_timestamp('2020-09-08T12:03:03+00:00')) ---- 12 -query R +query I SELECT EXTRACT("hour" FROM to_timestamp('2020-09-08T12:03:03+00:00')) ---- 12 -query R +query I SELECT EXTRACT('hour' FROM to_timestamp('2020-09-08T12:03:03+00:00')) ---- 12 -query R +query I SELECT EXTRACT(minute FROM to_timestamp('2020-09-08T12:12:00+00:00')) ---- 12 -query R +query I SELECT EXTRACT("minute" FROM to_timestamp('2020-09-08T12:12:00+00:00')) ---- 12 -query R +query I SELECT EXTRACT('minute' FROM to_timestamp('2020-09-08T12:12:00+00:00')) ---- 12 -query R +query I SELECT date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00')) ---- 12 -query R +# make sure the return type is integer +query T +SELECT arrow_typeof(date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'))) +---- +Int32 + +query I SELECT EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12.12345678 +12 -query R +query I SELECT EXTRACT(millisecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123.45678 +12123 -query R +query I SELECT EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123456.78 +12123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ----- -12123456780 -query R +query I SELECT EXTRACT("second" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12.12345678 +12 -query R +query I SELECT EXTRACT("millisecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123.45678 +12123 -query R +query I SELECT EXTRACT("microsecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123456.78 +12123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ----- -12123456780 -query R +query I SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12.12345678 +12 -query R +query I SELECT EXTRACT('millisecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123.45678 +12123 -query R +query I SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123456.78 +12123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ----- -12123456780 + # Keep precision when coercing Utf8 to Timestamp -query R +query I SELECT date_part('second', timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12.12345678 +12 -query R +query I SELECT date_part('millisecond', timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123.45678 +12123 -query R +query I SELECT date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00') ---- -12123456.78 +12123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00') ----- -12123456780 -query R + +query I SELECT date_part('second', '2020-09-08T12:00:12.12345678+00:00') ---- -12.12345678 +12 -query R +query I SELECT date_part('millisecond', '2020-09-08T12:00:12.12345678+00:00') ---- -12123.45678 +12123 -query R +query I SELECT date_part('microsecond', '2020-09-08T12:00:12.12345678+00:00') ---- -12123456.78 +12123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT date_part('nanosecond', '2020-09-08T12:00:12.12345678+00:00') ----- -12123456780 # test_date_part_time ## time32 seconds -query R +query I SELECT date_part('hour', arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 23 -query R +query I SELECT extract(hour from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 23 -query R +query I SELECT date_part('minute', arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 32 -query R +query I SELECT extract(minute from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 32 -query R +query I SELECT date_part('second', arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50 -query R +query I SELECT extract(second from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50 -query R +query I SELECT date_part('millisecond', arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50000 -query R +query I SELECT extract(millisecond from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50000 -query R +query I SELECT date_part('microsecond', arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50000000 -query R +query I SELECT extract(microsecond from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50000000 -query R -SELECT date_part('nanosecond', arrow_cast('23:32:50'::time, 'Time32(Second)')) ----- -50000000000 - -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT extract(nanosecond from arrow_cast('23:32:50'::time, 'Time32(Second)')) ----- -50000000000 query R SELECT date_part('epoch', arrow_cast('23:32:50'::time, 'Time32(Second)')) @@ -1195,65 +1186,58 @@ SELECT extract(epoch from arrow_cast('23:32:50'::time, 'Time32(Second)')) 84770 ## time32 milliseconds -query R +query I SELECT date_part('hour', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 23 -query R +query I SELECT extract(hour from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 23 -query R +query I SELECT date_part('minute', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 32 -query R +query I SELECT extract(minute from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 32 -query R +query I SELECT date_part('second', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- -50.123 +50 -query R +query I SELECT extract(second from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- -50.123 +50 -query R +query I SELECT date_part('millisecond', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 50123 -query R +query I SELECT extract(millisecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 50123 -query R +query I SELECT date_part('microsecond', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 50123000 -query R +query I SELECT extract(microsecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ---- 50123000 -query R -SELECT date_part('nanosecond', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ----- -50123000000 - -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT extract(nanosecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) ----- -50123000000 query R SELECT date_part('epoch', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) @@ -1266,65 +1250,58 @@ SELECT extract(epoch from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)' 84770.123 ## time64 microseconds -query R +query I SELECT date_part('hour', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 23 -query R +query I SELECT extract(hour from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 23 -query R +query I SELECT date_part('minute', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 32 -query R +query I SELECT extract(minute from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 32 -query R +query I SELECT date_part('second', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- -50.123456 +50 -query R +query I SELECT extract(second from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- -50.123456 +50 -query R +query I SELECT date_part('millisecond', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- -50123.456 +50123 -query R +query I SELECT extract(millisecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- -50123.456 +50123 -query R +query I SELECT date_part('microsecond', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 50123456 -query R +query I SELECT extract(microsecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ---- 50123456 -query R -SELECT date_part('nanosecond', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ----- -50123456000 - -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT extract(nanosecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) ----- -50123456000 query R SELECT date_part('epoch', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) @@ -1337,81 +1314,74 @@ SELECT extract(epoch from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecon 84770.123456 ## time64 nanoseconds -query R +query I SELECT date_part('hour', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- 23 -query R +query I SELECT extract(hour from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- 23 -query R +query I SELECT date_part('minute', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- 32 -query R +query I SELECT extract(minute from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- 32 -query R +query I SELECT date_part('second', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50.123456789 +50 -query R +query I select extract(second from '2024-08-09T12:13:14') ---- 14 -query R +query I select extract(seconds from '2024-08-09T12:13:14') ---- 14 -query R +query I SELECT extract(second from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50.123456789 +50 -query R +query I SELECT date_part('millisecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50123.456789 +50123 -query R +query I SELECT extract(millisecond from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50123.456789 +50123 # just some floating point stuff happening in the result here -query R +query I SELECT date_part('microsecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50123456.789000005 +50123456 -query R +query I SELECT extract(microsecond from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50123456.789000005 +50123456 -query R +query I SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- -50123456.789000005 +50123456 -query R +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ----- -50123456789 - -query R -SELECT extract(nanosecond from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ----- -50123456789 query R SELECT date_part('epoch', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) @@ -1487,32 +1457,32 @@ SELECT extract(epoch from arrow_cast('1969-12-31', 'Date64')) # test_extract_interval -query R +query I SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)')) ---- 10 -query R +query I SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)')) ---- 0 -query R +query I SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)')) ---- 0 -query R +query I SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)')) ---- 10 -query R +query I SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)')) ---- 1 -query R +query I SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)')) ---- 8 @@ -1523,47 +1493,47 @@ SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)')) query error DataFusion error: Arrow error: Compute error: Month does not support: Interval\(DayTime\) SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)')) -query R +query I SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)')) ---- 10 -query R +query I SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)')) ---- 0 -query R +query I SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)')) ---- 14400 -query R +query I SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)')) ---- 5 -query R +query I SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)')) ---- 864000 -query R +query I SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)')) ---- 0 -query R +query I SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)')) ---- 0 -query R +query I SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) ---- 2 -query R +query I SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) ---- 2 @@ -1573,17 +1543,17 @@ SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) ---- 2 -query R +query I SELECT extract(milliseconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) ---- 2000 -query R +query I SELECT extract(second from arrow_cast('2030 milliseconds', 'Interval(MonthDayNano)')) ---- -2.03 +2 -query R +query I SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)')) ---- NULL @@ -1597,7 +1567,7 @@ create table t (id int, i interval) as values (4, interval '8 months'), (5, NULL); -query IRR rowsort +query III select id, extract(second from i), @@ -1605,9 +1575,9 @@ select from t order by id; ---- -0 0.00000001 5 +0 0 5 1 0 15 -2 0.002 0 +2 0 0 3 2 0 4 0 8 5 NULL NULL @@ -1617,12 +1587,12 @@ drop table t; # test_extract_duration -query R +query I SELECT extract(second from arrow_cast(2, 'Duration(Second)')) ---- 2 -query R +query I SELECT extract(seconds from arrow_cast(2, 'Duration(Second)')) ---- 2 @@ -1632,27 +1602,27 @@ SELECT extract(epoch from arrow_cast(2, 'Duration(Second)')) ---- 2 -query R +query I SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)')) ---- 2000 -query R +query I SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)')) ---- -0.002 +0 -query R +query I SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)')) ---- -2.002 +2 -query R +query I SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)')) ---- 2002 -query R +query I SELECT extract(day from arrow_cast(864000, 'Duration(Second)')) ---- 10 @@ -1663,7 +1633,7 @@ SELECT extract(month from arrow_cast(864000, 'Duration(Second)')) query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\) SELECT extract(year from arrow_cast(864000, 'Duration(Second)')) -query R +query I SELECT extract(day from arrow_cast(NULL, 'Duration(Second)')) ---- NULL @@ -1720,10 +1690,8 @@ SELECT (date_part('microsecond', now()) = EXTRACT(microsecond FROM now())) ---- true -query B +query error DataFusion error: Internal error: unit Nanosecond not supported SELECT (date_part('nanosecond', now()) = EXTRACT(nanosecond FROM now())) ----- -true query B SELECT 'a' IN ('a','b') @@ -2230,7 +2198,7 @@ SELECT digest('','blake3'); ---- af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 -# vverify utf8view +# vverify utf8view query ? SELECT sha224(arrow_cast('tom', 'Utf8View')); ---- diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index f74e1006f7f6..5d8c4dfd05b4 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -4294,7 +4294,7 @@ physical_plan 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 DESC], has_header=false -query R +query I SELECT extract(month from ts) as months FROM csv_with_timestamps GROUP BY extract(month from ts) @@ -4344,7 +4344,7 @@ create table t1(state string, city string, min_temp float, area int, time timest ('MA', 'Boston', 70.4, 1, 50), ('MA', 'Bedford', 71.59, 2, 150); -query RI +query II select date_part('year', time) as bla, count(distinct state) as count from t1 group by bla; ---- 1970 1 diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index a80036df2ca8..b713008d2c3b 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -1756,13 +1756,13 @@ SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00Z', TIMESTAMP '2020-01- 2022-01-01T07:00:00+07:00 # postgresql: 1 -query R +query I SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part ---- 1 # postgresql: 8 -query R +query I SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as part ---- 8 @@ -1839,13 +1839,13 @@ SELECT date_bin('2 hour', TIMESTAMPTZ '2022-01-01 01:10:00+07', '2020-01-01T00:0 2021-12-31T18:00:00Z # postgresql: 1 -query R +query I SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part ---- 1 # postgresql: 18 -query R +query I SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as part ---- 18 diff --git a/datafusion/sqllogictest/test_files/tpch/q7.slt.part b/datafusion/sqllogictest/test_files/tpch/q7.slt.part index a16af4710478..92ce48c286be 100644 --- a/datafusion/sqllogictest/test_files/tpch/q7.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q7.slt.part @@ -141,7 +141,7 @@ physical_plan -query TTRR +query TTIR select supp_nation, cust_nation, diff --git a/datafusion/sqllogictest/test_files/tpch/q8.slt.part b/datafusion/sqllogictest/test_files/tpch/q8.slt.part index fd5773438466..225836a4b4d4 100644 --- a/datafusion/sqllogictest/test_files/tpch/q8.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q8.slt.part @@ -163,7 +163,7 @@ physical_plan -query RR +query IR select o_year, cast(cast(sum(case diff --git a/datafusion/sqllogictest/test_files/tpch/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/q9.slt.part index c4910beb842b..8cde946db877 100644 --- a/datafusion/sqllogictest/test_files/tpch/q9.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q9.slt.part @@ -127,7 +127,7 @@ physical_plan -query TRR +query TIR select nation, o_year,