From 2e5fec417f603ca9e44f492d3349d8dfb877110d Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Sat, 14 Dec 2024 20:11:04 +0800 Subject: [PATCH 1/2] feat(query): add interval type (#16990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support Interval DataType :) select to_interval('02:01'), to_interval('1 year 1 day 1 hour'); ┌───────────────────────────────────────────────────────────┐ │ to_interval('02:01') │ to_interval('1 year 1 day 1 hour') │ │ Interval │ Interval │ ├──────────────────────┼────────────────────────────────────┤ │ 2:01:00 │ 1year 1day 1:00:00 │ └───────────────────────────────────────────────────────────┘ * refactor: months_days_ns(pub i32,pub i32,pub i64) -> months_days_micros(pub i128) --- Cargo.lock | 3 + src/common/column/Cargo.toml | 2 +- src/common/column/src/types/mod.rs | 2 +- src/common/column/src/types/native.rs | 141 ++--- src/common/column/src/types/simd/mod.rs | 6 +- src/common/io/src/interval.rs | 564 ++++++++++++++++++ src/common/io/src/lib.rs | 2 + src/common/io/tests/it/interval.rs | 158 +++++ src/common/io/tests/it/main.rs | 1 + .../native/src/compression/integer/bp.rs | 7 +- .../src/compression/integer/delta_bp.rs | 7 +- .../native/src/compression/integer/freq.rs | 8 +- .../native/src/compression/integer/mod.rs | 9 +- .../native/src/compression/integer/traits.rs | 27 +- src/common/native/src/read/array/interval.rs | 128 ++++ src/common/native/src/read/array/mod.rs | 2 + src/common/native/src/read/batch_read.rs | 10 + src/common/native/src/read/deserialize.rs | 8 + src/common/native/src/write/serialize.rs | 5 +- .../src/schema_from_to_protobuf_impl.rs | 2 + src/meta/proto-conv/src/util.rs | 1 + src/meta/proto-conv/tests/it/main.rs | 1 + .../tests/it/v114_interval_datatype.rs | 225 +++++++ src/meta/protos/proto/datatype.proto | 1 + src/query/ast/src/ast/expr.rs | 4 + src/query/ast/src/parser/expr.rs | 2 + .../ast/tests/it/testdata/expr-error.txt | 2 +- .../ast/tests/it/testdata/stmt-error.txt | 14 +- .../expression/src/aggregate/payload_row.rs | 1 + .../expression/src/converts/arrow/from.rs | 10 + .../expression/src/converts/arrow/mod.rs | 1 + src/query/expression/src/converts/arrow/to.rs | 9 + .../expression/src/converts/datavalues/to.rs | 1 + .../expression/src/converts/meta/bincode.rs | 7 + .../src/converts/meta/index_scalar.rs | 4 + src/query/expression/src/kernels/concat.rs | 8 + .../src/kernels/group_by_hash/utils.rs | 1 + .../expression/src/kernels/take_chunks.rs | 19 + src/query/expression/src/property.rs | 4 + src/query/expression/src/row/fixed.rs | 10 + src/query/expression/src/row/row_converter.rs | 6 + src/query/expression/src/schema.rs | 3 + src/query/expression/src/type_check.rs | 1 + src/query/expression/src/types.rs | 5 + src/query/expression/src/types/interval.rs | 248 ++++++++ src/query/expression/src/types/variant.rs | 2 + src/query/expression/src/utils/display.rs | 10 + .../expression/src/utils/variant_transform.rs | 2 + src/query/expression/src/utils/visitor.rs | 6 + src/query/expression/src/values.rs | 71 +++ src/query/formats/Cargo.toml | 1 + .../formats/src/field_decoder/fast_values.rs | 24 + src/query/formats/src/field_decoder/nested.rs | 23 + .../src/field_decoder/separated_text.rs | 16 + src/query/formats/src/field_encoder/csv.rs | 6 +- src/query/formats/src/field_encoder/json.rs | 5 +- src/query/formats/src/field_encoder/values.rs | 15 + src/query/formats/src/lib.rs | 2 + src/query/formats/src/output_format/json.rs | 2 + src/query/functions/Cargo.toml | 1 + src/query/functions/src/scalars/interval.rs | 81 +++ src/query/functions/src/scalars/mod.rs | 2 + .../functions/tests/it/scalars/parser.rs | 1 + .../it/scalars/testdata/function_list.txt | 6 + .../sql/src/planner/semantic/type_check.rs | 1 + .../storages/common/stage/src/read/cast.rs | 4 + .../table_functions/clustering_information.rs | 3 + src/tests/sqlsmith/src/sql_gen/ddl.rs | 4 + .../functions/02_0079_function_interval.test | 24 + 69 files changed, 1869 insertions(+), 123 deletions(-) create mode 100644 src/common/io/src/interval.rs create mode 100644 src/common/io/tests/it/interval.rs create mode 100644 src/common/native/src/read/array/interval.rs create mode 100644 src/meta/proto-conv/tests/it/v114_interval_datatype.rs create mode 100644 src/query/expression/src/types/interval.rs create mode 100644 src/query/functions/src/scalars/interval.rs create mode 100644 tests/sqllogictests/suites/query/functions/02_0079_function_interval.test diff --git a/Cargo.lock b/Cargo.lock index fbd12ef66739..5982a33dc2fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3166,6 +3166,7 @@ dependencies = [ "arrow-buffer", "arrow-data", "arrow-schema", + "borsh", "bytemuck", "databend-common-base", "databend-common-exception", @@ -3336,6 +3337,7 @@ dependencies = [ "bstr", "chrono-tz 0.8.6", "databend-common-base", + "databend-common-column", "databend-common-exception", "databend-common-expression", "databend-common-io", @@ -3374,6 +3376,7 @@ dependencies = [ "ctor 0.2.8", "databend-common-ast", "databend-common-base", + "databend-common-column", "databend-common-exception", "databend-common-expression", "databend-common-hashtable", diff --git a/src/common/column/Cargo.toml b/src/common/column/Cargo.toml index 5c5ecf86d325..1589cce8c863 100644 --- a/src/common/column/Cargo.toml +++ b/src/common/column/Cargo.toml @@ -21,7 +21,7 @@ column-default = [ ] [dependencies] - +borsh = { workspace = true, features = ["derive"] } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } diff --git a/src/common/column/src/types/mod.rs b/src/common/column/src/types/mod.rs index e31e249fb660..e10af5be2065 100644 --- a/src/common/column/src/types/mod.rs +++ b/src/common/column/src/types/mod.rs @@ -114,6 +114,6 @@ mod private { impl Sealed for OrderedFloat {} impl Sealed for OrderedFloat {} impl Sealed for super::days_ms {} - impl Sealed for super::months_days_ns {} + impl Sealed for super::months_days_micros {} impl Sealed for View {} } diff --git a/src/common/column/src/types/native.rs b/src/common/column/src/types/native.rs index 1ba84c4bb19e..0a4bff09127b 100644 --- a/src/common/column/src/types/native.rs +++ b/src/common/column/src/types/native.rs @@ -17,9 +17,13 @@ use std::convert::TryFrom; use std::ops::Neg; use std::panic::RefUnwindSafe; +use borsh::BorshDeserialize; +use borsh::BorshSerialize; use bytemuck::Pod; use bytemuck::Zeroable; use databend_common_base::base::OrderedFloat; +use serde_derive::Deserialize; +use serde_derive::Serialize; use super::PrimitiveType; @@ -243,124 +247,71 @@ impl NativeType for days_ms { } /// The in-memory representation of the MonthDayNano variant of the "Interval" logical type. -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash, Zeroable, Pod)] +#[derive( + Debug, + Copy, + Clone, + Default, + PartialEq, + PartialOrd, + Ord, + Eq, + Hash, + Zeroable, + Pod, + Serialize, + Deserialize, + BorshSerialize, + BorshDeserialize, +)] #[allow(non_camel_case_types)] #[repr(C)] -pub struct months_days_ns(pub i32, pub i32, pub i64); +pub struct months_days_micros(pub i128); -impl months_days_ns { - /// A new [`months_days_ns`]. - #[inline] - pub fn new(months: i32, days: i32, nanoseconds: i64) -> Self { - Self(months, days, nanoseconds) +impl months_days_micros { + pub fn new(months: i32, days: i32, microseconds: i64) -> Self { + let months_bits = (months as i128) << 96; + let days_bits = (days as i128) << 64; + let micros_bits = microseconds as i128; + + Self(months_bits | days_bits | micros_bits) } - /// The number of months - #[inline] pub fn months(&self) -> i32 { - self.0 + // Decoding logic + ((self.0 >> 96) & 0xFFFFFFFF) as i32 } - /// The number of days - #[inline] pub fn days(&self) -> i32 { - self.1 + ((self.0 >> 64) & 0xFFFFFFFF) as i32 } - /// The number of nanoseconds - #[inline] - pub fn ns(&self) -> i64 { - self.2 + pub fn microseconds(&self) -> i64 { + (self.0 & 0xFFFFFFFFFFFFFFFF) as i64 } } -impl NativeType for months_days_ns { +impl NativeType for months_days_micros { const PRIMITIVE: PrimitiveType = PrimitiveType::MonthDayNano; type Bytes = [u8; 16]; #[inline] fn to_le_bytes(&self) -> Self::Bytes { - let months = self.months().to_le_bytes(); - let days = self.days().to_le_bytes(); - let ns = self.ns().to_le_bytes(); - let mut result = [0; 16]; - result[0] = months[0]; - result[1] = months[1]; - result[2] = months[2]; - result[3] = months[3]; - result[4] = days[0]; - result[5] = days[1]; - result[6] = days[2]; - result[7] = days[3]; - (0..8).for_each(|i| { - result[8 + i] = ns[i]; - }); - result + self.0.to_le_bytes() } #[inline] fn to_be_bytes(&self) -> Self::Bytes { - let months = self.months().to_be_bytes(); - let days = self.days().to_be_bytes(); - let ns = self.ns().to_be_bytes(); - let mut result = [0; 16]; - result[0] = months[0]; - result[1] = months[1]; - result[2] = months[2]; - result[3] = months[3]; - result[4] = days[0]; - result[5] = days[1]; - result[6] = days[2]; - result[7] = days[3]; - (0..8).for_each(|i| { - result[8 + i] = ns[i]; - }); - result + self.0.to_be_bytes() } #[inline] fn from_le_bytes(bytes: Self::Bytes) -> Self { - let mut months = [0; 4]; - months[0] = bytes[0]; - months[1] = bytes[1]; - months[2] = bytes[2]; - months[3] = bytes[3]; - let mut days = [0; 4]; - days[0] = bytes[4]; - days[1] = bytes[5]; - days[2] = bytes[6]; - days[3] = bytes[7]; - let mut ns = [0; 8]; - (0..8).for_each(|i| { - ns[i] = bytes[8 + i]; - }); - Self( - i32::from_le_bytes(months), - i32::from_le_bytes(days), - i64::from_le_bytes(ns), - ) + Self(i128::from_le_bytes(bytes)) } #[inline] fn from_be_bytes(bytes: Self::Bytes) -> Self { - let mut months = [0; 4]; - months[0] = bytes[0]; - months[1] = bytes[1]; - months[2] = bytes[2]; - months[3] = bytes[3]; - let mut days = [0; 4]; - days[0] = bytes[4]; - days[1] = bytes[5]; - days[2] = bytes[6]; - days[3] = bytes[7]; - let mut ns = [0; 8]; - (0..8).for_each(|i| { - ns[i] = bytes[8 + i]; - }); - Self( - i32::from_be_bytes(months), - i32::from_be_bytes(days), - i64::from_be_bytes(ns), - ) + Self(i128::from_be_bytes(bytes)) } } @@ -370,9 +321,15 @@ impl std::fmt::Display for days_ms { } } -impl std::fmt::Display for months_days_ns { +impl std::fmt::Display for months_days_micros { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}m {}d {}ns", self.months(), self.days(), self.ns()) + write!( + f, + "{}m {}d {}micros", + self.months(), + self.days(), + self.microseconds() + ) } } @@ -385,12 +342,12 @@ impl Neg for days_ms { } } -impl Neg for months_days_ns { +impl Neg for months_days_micros { type Output = Self; #[inline(always)] fn neg(self) -> Self::Output { - Self::new(-self.months(), -self.days(), -self.ns()) + Self::new(-self.months(), -self.days(), -self.microseconds()) } } diff --git a/src/common/column/src/types/simd/mod.rs b/src/common/column/src/types/simd/mod.rs index 206c6a577f7f..8fbe6e611609 100644 --- a/src/common/column/src/types/simd/mod.rs +++ b/src/common/column/src/types/simd/mod.rs @@ -19,7 +19,7 @@ use super::days_ms; use super::f16; use super::i256; -use super::months_days_ns; +use super::months_days_micros; use super::BitChunk; use super::BitChunkIter; use super::NativeType; @@ -151,7 +151,7 @@ pub(super) use native_simd; // of how they are represented in the different channels. native_simd!(f16x32, f16, 32, u32); native_simd!(days_msx8, days_ms, 8, u8); -native_simd!(months_days_nsx8, months_days_ns, 8, u8); +native_simd!(months_days_microsx8, months_days_micros, 8, u8); native_simd!(i128x8, i128, 8, u8); native_simd!(i256x8, i256, 8, u8); @@ -185,4 +185,4 @@ native!(f64, f64x8); native!(i128, i128x8); native!(i256, i256x8); native!(days_ms, days_msx8); -native!(months_days_ns, months_days_nsx8); +native!(months_days_micros, months_days_microsx8); diff --git a/src/common/io/src/interval.rs b/src/common/io/src/interval.rs new file mode 100644 index 000000000000..a52df21725f6 --- /dev/null +++ b/src/common/io/src/interval.rs @@ -0,0 +1,564 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; +use std::fmt::Formatter; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +pub trait BufferReadIntervalExt { + fn read_interval_text(&mut self) -> Result; +} + +#[derive(Debug, Copy, Clone, PartialEq, Default)] +pub struct Interval { + pub months: i32, + pub days: i32, + pub micros: i64, +} + +impl Display for Interval { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut buffer = [0u8; 70]; + let len = IntervalToStringCast::format(*self, &mut buffer); + write!(f, "{}", String::from_utf8_lossy(&buffer[..len])) + } +} + +struct IntervalToStringCast; + +impl IntervalToStringCast { + fn format_signed_number(value: i64, buffer: &mut [u8], length: &mut usize) { + let s = value.to_string(); + let bytes = s.as_bytes(); + buffer[*length..*length + bytes.len()].copy_from_slice(bytes); + *length += bytes.len(); + } + + fn format_two_digits(value: i64, buffer: &mut [u8], length: &mut usize) { + let s = format!("{:02}", value.abs()); + let bytes = s.as_bytes(); + buffer[*length..*length + bytes.len()].copy_from_slice(bytes); + *length += bytes.len(); + } + + fn format_interval_value(value: i32, buffer: &mut [u8], length: &mut usize, name: &str) { + if value == 0 { + return; + } + if *length != 0 { + buffer[*length] = b' '; + *length += 1; + } + Self::format_signed_number(value as i64, buffer, length); + let name_bytes = name.as_bytes(); + buffer[*length..*length + name_bytes.len()].copy_from_slice(name_bytes); + *length += name_bytes.len(); + if value != 1 && value != -1 { + buffer[*length] = b's'; + *length += 1; + } + } + + fn format_micros(mut micros: i64, buffer: &mut [u8], length: &mut usize) { + if micros < 0 { + micros = -micros; + } + let s = format!("{:06}", micros); + let bytes = s.as_bytes(); + buffer[*length..*length + bytes.len()].copy_from_slice(bytes); + *length += bytes.len(); + + while *length > 0 && buffer[*length - 1] == b'0' { + *length -= 1; + } + } + + pub fn format(interval: Interval, buffer: &mut [u8]) -> usize { + let mut length = 0; + if interval.months != 0 { + let years = interval.months / 12; + let months = interval.months - years * 12; + Self::format_interval_value(years, buffer, &mut length, " year"); + Self::format_interval_value(months, buffer, &mut length, " month"); + } + if interval.days != 0 { + Self::format_interval_value(interval.days, buffer, &mut length, " day"); + } + if interval.micros != 0 { + if length != 0 { + buffer[length] = b' '; + length += 1; + } + let mut micros = interval.micros; + if micros < 0 { + buffer[length] = b'-'; + length += 1; + micros = -micros; + } + let hour = micros / MICROS_PER_HOUR; + micros -= hour * MICROS_PER_HOUR; + let min = micros / MICROS_PER_MINUTE; + micros -= min * MICROS_PER_MINUTE; + let sec = micros / MICROS_PER_SEC; + micros -= sec * MICROS_PER_SEC; + + Self::format_signed_number(hour, buffer, &mut length); + buffer[length] = b':'; + length += 1; + Self::format_two_digits(min, buffer, &mut length); + buffer[length] = b':'; + length += 1; + Self::format_two_digits(sec, buffer, &mut length); + if micros != 0 { + buffer[length] = b'.'; + length += 1; + Self::format_micros(micros, buffer, &mut length); + } + } else if length == 0 { + buffer[..8].copy_from_slice(b"00:00:00"); + return 8; + } + length + } +} + +impl Interval { + pub fn from_string(str: &str) -> Result { + Self::from_cstring(str.as_bytes()) + } + pub fn from_cstring(str: &[u8]) -> Result { + let mut result = Interval::default(); + let mut pos = 0; + let len = str.len(); + let mut found_any = false; + + if len == 0 { + return Err(ErrorCode::BadArguments("Empty string".to_string())); + } + match str[pos] { + b'@' => { + pos += 1; + } + b'P' | b'p' => { + return Err(ErrorCode::BadArguments( + "Posix intervals not supported yet".to_string(), + )); + } + _ => {} + } + + while pos < len { + match str[pos] { + b' ' | b'\t' | b'\n' => { + pos += 1; + continue; + } + b'0'..=b'9' => { + let (number, fraction, next_pos) = parse_number(&str[pos..])?; + pos += next_pos; + let (specifier, next_pos) = parse_identifier(&str[pos..]); + + pos += next_pos; + let _ = apply_specifier(&mut result, number, fraction, &specifier); + found_any = true; + } + b'-' => { + pos += 1; + let (number, fraction, next_pos) = parse_number(&str[pos..])?; + let number = -number; + let fraction = -fraction; + + pos += next_pos; + + let (specifier, next_pos) = parse_identifier(&str[pos..]); + + pos += next_pos; + let _ = apply_specifier(&mut result, number, fraction, &specifier); + found_any = true; + } + b'a' | b'A' => { + if len - pos < 3 + || str[pos + 1] != b'g' && str[pos + 1] != b'G' + || str[pos + 2] != b'o' && str[pos + 2] != b'O' + { + return Err(ErrorCode::BadArguments( + "Invalid 'ago' specifier".to_string(), + )); + } + pos += 3; + while pos < len { + match str[pos] { + b' ' | b'\t' | b'\n' => { + pos += 1; + } + _ => { + return Err(ErrorCode::BadArguments( + "Trailing characters after 'ago'".to_string(), + )); + } + } + } + result.months = -result.months; + result.days = -result.days; + result.micros = -result.micros; + return Ok(result); + } + _ => { + return Err(ErrorCode::BadArguments(format!( + "Unexpected character at position {}", + pos + ))); + } + } + } + + if !found_any { + return Err(ErrorCode::BadArguments( + "No interval specifiers found".to_string(), + )); + } + Ok(result) + } +} + +fn parse_number(bytes: &[u8]) -> Result<(i64, i64, usize)> { + let mut number: i64 = 0; + let mut fraction: i64 = 0; + let mut pos = 0; + + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + number = number + .checked_mul(10) + .ok_or(ErrorCode::BadArguments("Number too large"))? + + (bytes[pos] - b'0') as i64; + pos += 1; + } + + if pos < bytes.len() && bytes[pos] == b'.' { + pos += 1; + let mut mult: i64 = 100000; + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + if mult > 0 { + fraction += (bytes[pos] - b'0') as i64 * mult; + } + mult /= 10; + pos += 1; + } + } + if pos < bytes.len() && bytes[pos] == b':' { + let time_bytes = &bytes[pos..]; + let mut time_pos = 0; + let mut total_micros: i64 = number * 60 * 60 * MICROS_PER_SEC; + let mut colon_count = 0; + + while colon_count < 2 && time_bytes.len() > time_pos { + let (minute, _, next_pos) = parse_time_part(&time_bytes[time_pos..])?; + let minute_nanos = minute * 60 * MICROS_PER_SEC; + total_micros += minute_nanos; + time_pos += next_pos; + + if time_bytes.len() > time_pos && time_bytes[time_pos] == b':' { + time_pos += 1; + colon_count += 1; + } else { + break; + } + } + if time_bytes.len() > time_pos { + let (seconds, micros, next_pos) = parse_time_part_with_micros(&time_bytes[time_pos..])?; + total_micros += seconds * MICROS_PER_SEC + micros; + time_pos += next_pos; + } + return Ok((total_micros, 0, pos + time_pos)); + } + + if pos == 0 { + return Err(ErrorCode::BadArguments("Expected number".to_string())); + } + + Ok((number, fraction, pos)) +} + +fn parse_time_part(bytes: &[u8]) -> Result<(i64, i64, usize)> { + let mut number: i64 = 0; + let mut pos = 0; + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + number = number + .checked_mul(10) + .ok_or(ErrorCode::BadArguments("Number too large"))? + + (bytes[pos] - b'0') as i64; + pos += 1; + } + Ok((number, 0, pos)) +} + +fn parse_time_part_with_micros(bytes: &[u8]) -> Result<(i64, i64, usize)> { + let mut number: i64 = 0; + let mut fraction: i64 = 0; + let mut pos = 0; + + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + number = number + .checked_mul(10) + .ok_or(ErrorCode::BadArguments("Number too large"))? + + (bytes[pos] - b'0') as i64; + pos += 1; + } + + if pos < bytes.len() && bytes[pos] == b'.' { + pos += 1; + let mut mult: i64 = 100000; + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + if mult > 0 { + fraction += (bytes[pos] - b'0') as i64 * mult; + } + mult /= 10; + pos += 1; + } + } + + Ok((number, fraction, pos)) +} + +fn parse_identifier(s: &[u8]) -> (String, usize) { + let mut pos = 0; + while pos < s.len() && (s[pos] == b' ' || s[pos] == b'\t' || s[pos] == b'\n') { + pos += 1; + } + let start_pos = pos; + while pos < s.len() && (s[pos].is_ascii_alphabetic()) { + pos += 1; + } + + if pos == start_pos { + return ("".to_string(), pos); + } + + let identifier = String::from_utf8_lossy(&s[start_pos..pos]).to_string(); + (identifier, pos) +} + +#[derive(Debug, PartialEq, Eq)] +enum DatePartSpecifier { + Millennium, + Century, + Decade, + Year, + Quarter, + Month, + Day, + Week, + Microseconds, + Milliseconds, + Second, + Minute, + Hour, +} + +fn try_get_date_part_specifier(specifier_str: &str) -> Result { + match specifier_str.to_lowercase().as_str() { + "millennium" | "millennia" => Ok(DatePartSpecifier::Millennium), + "century" | "centuries" => Ok(DatePartSpecifier::Century), + "decade" | "decades" => Ok(DatePartSpecifier::Decade), + "year" | "years" | "y" => Ok(DatePartSpecifier::Year), + "quarter" | "quarters" => Ok(DatePartSpecifier::Quarter), + "month" | "months" | "mon" => Ok(DatePartSpecifier::Month), + "day" | "days" | "d" => Ok(DatePartSpecifier::Day), + "week" | "weeks" | "w" => Ok(DatePartSpecifier::Week), + "microsecond" | "microseconds" | "us" => Ok(DatePartSpecifier::Microseconds), + "millisecond" | "milliseconds" | "ms" => Ok(DatePartSpecifier::Milliseconds), + "second" | "seconds" | "s" => Ok(DatePartSpecifier::Second), + "minute" | "minutes" | "m" => Ok(DatePartSpecifier::Minute), + "hour" | "hours" | "h" => Ok(DatePartSpecifier::Hour), + _ => Err(ErrorCode::BadArguments(format!( + "Invalid date part specifier: {}", + specifier_str + ))), + } +} + +const MICROS_PER_SEC: i64 = 1_000_000; +const MICROS_PER_MSEC: i64 = 1_000; +const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SEC; +const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE; +const DAYS_PER_WEEK: i32 = 7; +const MONTHS_PER_QUARTER: i32 = 3; +const MONTHS_PER_YEAR: i32 = 12; +const MONTHS_PER_DECADE: i32 = 120; +const MONTHS_PER_CENTURY: i32 = 1200; +const MONTHS_PER_MILLENNIUM: i32 = 12000; + +fn apply_specifier( + result: &mut Interval, + number: i64, + fraction: i64, + specifier_str: &str, +) -> Result<()> { + if specifier_str.is_empty() { + result.micros = result + .micros + .checked_add(number) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + result.micros = result + .micros + .checked_add(fraction) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + return Ok(()); + } + + let specifier = try_get_date_part_specifier(specifier_str)?; + match specifier { + DatePartSpecifier::Millennium => { + result.months = result + .months + .checked_add( + number + .checked_mul(MONTHS_PER_MILLENNIUM as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Century => { + result.months = result + .months + .checked_add( + number + .checked_mul(MONTHS_PER_CENTURY as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Decade => { + result.months = result + .months + .checked_add( + number + .checked_mul(MONTHS_PER_DECADE as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Year => { + result.months = result + .months + .checked_add( + number + .checked_mul(MONTHS_PER_YEAR as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Quarter => { + result.months = result + .months + .checked_add( + number + .checked_mul(MONTHS_PER_QUARTER as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Month => { + result.months = result + .months + .checked_add( + number + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Day => { + result.days = result + .days + .checked_add( + number + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Week => { + result.days = result + .days + .checked_add( + number + .checked_mul(DAYS_PER_WEEK as i64) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))? + .try_into() + .map_err(|_| ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Microseconds => { + result.micros = result + .micros + .checked_add(number) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Milliseconds => { + result.micros = result + .micros + .checked_add( + number + .checked_mul(MICROS_PER_MSEC) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Second => { + result.micros = result + .micros + .checked_add( + number + .checked_mul(MICROS_PER_SEC) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Minute => { + result.micros = result + .micros + .checked_add( + number + .checked_mul(MICROS_PER_MINUTE) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + DatePartSpecifier::Hour => { + result.micros = result + .micros + .checked_add( + number + .checked_mul(MICROS_PER_HOUR) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?, + ) + .ok_or(ErrorCode::BadArguments("Overflow".to_string()))?; + } + } + Ok(()) +} diff --git a/src/common/io/src/lib.rs b/src/common/io/src/lib.rs index d7964a2734d7..a067ded381d7 100644 --- a/src/common/io/src/lib.rs +++ b/src/common/io/src/lib.rs @@ -43,6 +43,7 @@ pub mod geometry; mod position; mod stat_buffer; +pub mod interval; pub mod wkb; pub use bitmap::deserialize_bitmap; @@ -65,3 +66,4 @@ pub use geometry::read_srid; pub use geometry::Axis; pub use geometry::Extremum; pub use geometry::GeometryDataType; +pub use interval::Interval; diff --git a/src/common/io/tests/it/interval.rs b/src/common/io/tests/it/interval.rs new file mode 100644 index 000000000000..2036fbfb6785 --- /dev/null +++ b/src/common/io/tests/it/interval.rs @@ -0,0 +1,158 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_io::Interval; + +#[test] +fn test_interval_from_string() { + let tests = vec![ + ("1 year 2 months 3 days", Interval { + months: 14, + days: 3, + micros: 0, + }), + ("-1 year -2 months -3 days", Interval { + months: -14, + days: -3, + micros: 0, + }), + ("1 year 2 months 3 days ago", Interval { + months: -14, + days: -3, + micros: 0, + }), + ("1day", Interval { + months: 0, + days: 1, + micros: 0, + }), + ("1 hour", Interval { + months: 0, + days: 0, + micros: 3600000000, + }), + ("1 hours 1 second", Interval { + months: 0, + days: 0, + micros: 3601000000, + }), + ("1 day 01:23:45", Interval { + months: 0, + days: 1, + micros: 5_025_000_000, + }), + ("2 hours 30 minutes", Interval { + months: 0, + days: 0, + micros: 9_000_000_000, + }), + ("-1 day 01:23:45", Interval { + months: 0, + days: -1, + micros: 5025000000, + }), + ("-1 day -01:23:45", Interval { + months: 0, + days: -1, + micros: -5025000000, + }), + ]; + + for (input, expected) in tests { + let interval = Interval::from_string(input).unwrap(); + assert_eq!(interval, expected); + } +} + +#[test] +fn test_string_to_interval() { + let tests = vec![ + ( + Interval { + months: 14, + days: 3, + micros: 0, + }, + "1 year 2 months 3 days", + ), + ( + Interval { + months: -14, + days: -3, + micros: 0, + }, + "-1 year -2 months -3 days", + ), + ( + Interval { + months: 0, + days: 1, + micros: 0, + }, + "1 day", + ), + ( + Interval { + months: 0, + days: 0, + micros: 3600000000, + }, + "1:00:00", + ), + ( + Interval { + months: 0, + days: 0, + micros: 3601000000, + }, + "1:00:01", + ), + ( + Interval { + months: 0, + days: 1, + micros: 5025000000, + }, + "1 day 1:23:45", + ), + ( + Interval { + months: 0, + days: 0, + micros: 9000000000, + }, + "2:30:00", + ), + ( + Interval { + months: 0, + days: -1, + micros: 5025000000, + }, + "-1 day 1:23:45", + ), + ( + Interval { + months: 0, + days: -1, + micros: -5025000000, + }, + "-1 day -1:23:45", + ), + ]; + + for (interval, expected) in tests { + assert_eq!(interval.to_string(), expected); + } +} diff --git a/src/common/io/tests/it/main.rs b/src/common/io/tests/it/main.rs index 354ca2e328fd..9ac5464bb294 100644 --- a/src/common/io/tests/it/main.rs +++ b/src/common/io/tests/it/main.rs @@ -23,4 +23,5 @@ mod bincode_serialization; mod borsh_serialization; mod cursor_ext; mod escape; +mod interval; mod serialization_format_compatability; diff --git a/src/common/native/src/compression/integer/bp.rs b/src/common/native/src/compression/integer/bp.rs index 7deabf19fe66..f8bc9ced4f62 100644 --- a/src/common/native/src/compression/integer/bp.rs +++ b/src/common/native/src/compression/integer/bp.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::io::BufRead; use bitpacking::BitPacker; @@ -89,8 +90,10 @@ impl IntegerCompression for Bitpacking { } fn compress_ratio(&self, stats: &IntegerStats) -> f64 { - if stats.min.as_i64() < 0 - || std::mem::size_of::() != 4 + if match stats.min.compare_i64(0) { + Ordering::Greater | Ordering::Equal => false, + Ordering::Less => true, + } || std::mem::size_of::() != 4 || stats.src.len() % BitPacker4x::BLOCK_LEN != 0 { return 0.0f64; diff --git a/src/common/native/src/compression/integer/delta_bp.rs b/src/common/native/src/compression/integer/delta_bp.rs index bc6bde4bcaff..c6f15e874141 100644 --- a/src/common/native/src/compression/integer/delta_bp.rs +++ b/src/common/native/src/compression/integer/delta_bp.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::io::BufRead; use bitpacking::BitPacker; @@ -94,8 +95,10 @@ impl IntegerCompression for DeltaBitpacking { } fn compress_ratio(&self, stats: &IntegerStats) -> f64 { - if stats.min.as_i64() < 0 - || std::mem::size_of::() != 4 + if match stats.min.compare_i64(0) { + Ordering::Greater | Ordering::Equal => false, + Ordering::Less => true, + } || std::mem::size_of::() != 4 || stats.src.len() % BitPacker4x::BLOCK_LEN != 0 || !stats.is_sorted || stats.null_count > 0 diff --git a/src/common/native/src/compression/integer/freq.rs b/src/common/native/src/compression/integer/freq.rs index 7ff97d20dcfb..bcdf21594b24 100644 --- a/src/common/native/src/compression/integer/freq.rs +++ b/src/common/native/src/compression/integer/freq.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::io::BufRead; use std::io::Read; @@ -145,7 +146,12 @@ impl IntegerCompression for Freq { } } - if max_count as f64 / stats.tuple_count as f64 >= 0.9 && stats.max.as_i64() >= (1 << 8) { + if max_count as f64 / stats.tuple_count as f64 >= 0.9 + && match stats.max.compare_i64(1 << 8) { + Ordering::Greater | Ordering::Equal => true, + Ordering::Less => false, + } + { return (stats.tuple_count - 1) as f64; } diff --git a/src/common/native/src/compression/integer/mod.rs b/src/common/native/src/compression/integer/mod.rs index a135ff13a873..839ef18cfabc 100644 --- a/src/common/native/src/compression/integer/mod.rs +++ b/src/common/native/src/compression/integer/mod.rs @@ -70,7 +70,14 @@ pub fn compress_integer( let input_buf = bytemuck::cast_slice(col.as_slice()); c.compress(input_buf, buf) } - IntCompressor::Extend(c) => c.compress(col, &stats, &write_options, buf), + IntCompressor::Extend(c) => { + if T::USE_COMMON_COMPRESSION { + return Err(Error::NotYetImplemented( + "Not support Extend compressor".to_string(), + )); + } + c.compress(col, &stats, &write_options, buf) + } }?; buf[pos..pos + 4].copy_from_slice(&(compressed_size as u32).to_le_bytes()); buf[pos + 4..pos + 8] diff --git a/src/common/native/src/compression/integer/traits.rs b/src/common/native/src/compression/integer/traits.rs index 8a9fc4e9f7ea..4b63c464f87d 100644 --- a/src/common/native/src/compression/integer/traits.rs +++ b/src/common/native/src/compression/integer/traits.rs @@ -12,21 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::hash::Hash; use databend_common_column::types::i256; +use databend_common_column::types::months_days_micros; use databend_common_column::types::NativeType; pub trait IntegerType: NativeType + PartialOrd + Hash + Eq { - fn as_i64(&self) -> i64; + fn compare_i64(&self, i: i64) -> Ordering; + const USE_COMMON_COMPRESSION: bool; } macro_rules! integer_type { ($type:ty) => { impl IntegerType for $type { - fn as_i64(&self) -> i64 { - *self as i64 + fn compare_i64(&self, i: i64) -> Ordering { + (*self as i64).cmp(&i) } + const USE_COMMON_COMPRESSION: bool = false; } }; } @@ -39,14 +43,19 @@ integer_type!(i8); integer_type!(i16); integer_type!(i32); integer_type!(i64); +integer_type!(i128); -impl IntegerType for i128 { - fn as_i64(&self) -> i64 { - *self as i64 +impl IntegerType for i256 { + fn compare_i64(&self, i: i64) -> Ordering { + self.0.as_i64().cmp(&i) } + const USE_COMMON_COMPRESSION: bool = false; } -impl IntegerType for i256 { - fn as_i64(&self) -> i64 { - self.0.as_i64() + +// pub struct months_days_micros(pub i128); +impl IntegerType for months_days_micros { + fn compare_i64(&self, i: i64) -> Ordering { + (self.0 as i64).cmp(&i) } + const USE_COMMON_COMPRESSION: bool = true; } diff --git a/src/common/native/src/read/array/interval.rs b/src/common/native/src/read/array/interval.rs new file mode 100644 index 000000000000..efa01b25b0a7 --- /dev/null +++ b/src/common/native/src/read/array/interval.rs @@ -0,0 +1,128 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Cursor; + +use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; +use databend_common_expression::types::IntervalType; +use databend_common_expression::types::ValueType; +use databend_common_expression::Column; +use databend_common_expression::TableDataType; + +use crate::compression::integer::decompress_integer; +use crate::error::Result; +use crate::nested::InitNested; +use crate::nested::NestedState; +use crate::read::read_basic::*; +use crate::read::BufReader; +use crate::read::NativeReadBuf; +use crate::read::PageIterator; +use crate::PageMeta; + +#[derive(Debug)] +pub struct IntervalNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + iter: I, + data_type: TableDataType, + init: Vec, + scratch: Vec, +} + +impl IntervalNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + pub fn new(iter: I, data_type: TableDataType, init: Vec) -> Self { + Self { + iter, + data_type, + init, + scratch: vec![], + } + } +} + +impl IntervalNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + fn deserialize(&mut self, num_values: u64, buffer: Vec) -> Result<(NestedState, Column)> { + let mut reader = BufReader::with_capacity(buffer.len(), Cursor::new(buffer)); + let (nested, validity) = read_nested(&mut reader, &self.init, num_values as usize)?; + let length = num_values as usize; + + let mut values = Vec::with_capacity(length); + decompress_integer(&mut reader, length, &mut values, &mut self.scratch)?; + assert_eq!(values.len(), length); + + let mut buffer = reader.into_inner().into_inner(); + self.iter.swap_buffer(&mut buffer); + + let column: Buffer = values.into(); + let column: Buffer = unsafe { std::mem::transmute(column) }; + let mut col = IntervalType::upcast_column(column); + if self.data_type.is_nullable() { + col = col.wrap_nullable(validity); + } + Ok((nested, col)) + } +} + +impl Iterator for IntervalNestedIter +where I: Iterator)>> + PageIterator + Send + Sync +{ + type Item = Result<(NestedState, Column)>; + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } + + fn nth(&mut self, n: usize) -> Option { + match self.iter.nth(n) { + Some(Ok((num_values, buffer))) => Some(self.deserialize(num_values, buffer)), + Some(Err(err)) => Some(Result::Err(err)), + None => None, + } + } +} + +pub fn read_nested_interval( + reader: &mut R, + data_type: TableDataType, + init: Vec, + page_metas: Vec, +) -> Result> { + let mut scratch = vec![]; + let mut results = Vec::with_capacity(page_metas.len()); + for page_meta in page_metas { + let num_values = page_meta.num_values as usize; + let (nested, validity) = read_nested(reader, &init, num_values)?; + + let mut values = Vec::with_capacity(num_values); + decompress_integer(reader, num_values, &mut values, &mut scratch)?; + + let column: Buffer = values.into(); + let column: Buffer = unsafe { std::mem::transmute(column) }; + let mut col = IntervalType::upcast_column(column); + if data_type.is_nullable() { + col = col.wrap_nullable(validity); + } + results.push((nested, col)); + } + Ok(results) +} diff --git a/src/common/native/src/read/array/mod.rs b/src/common/native/src/read/array/mod.rs index 2e424195b58b..cb30e5a9e70f 100644 --- a/src/common/native/src/read/array/mod.rs +++ b/src/common/native/src/read/array/mod.rs @@ -31,5 +31,7 @@ mod struct_; pub use struct_::*; mod list; pub use list::*; +mod interval; mod map; +pub use interval::*; pub use map::*; diff --git a/src/common/native/src/read/batch_read.rs b/src/common/native/src/read/batch_read.rs index 884573450b5e..095d9cc735e5 100644 --- a/src/common/native/src/read/batch_read.rs +++ b/src/common/native/src/read/batch_read.rs @@ -92,6 +92,16 @@ pub fn read_nested_column( page_metas.pop().unwrap(), )? } + Interval => { + init.push(InitNested::Primitive(is_nullable)); + + read_nested_interval::<_>( + &mut readers.pop().unwrap(), + data_type.clone(), + init, + page_metas.pop().unwrap(), + )? + } Timestamp => { init.push(InitNested::Primitive(is_nullable)); read_nested_integer::( diff --git a/src/common/native/src/read/deserialize.rs b/src/common/native/src/read/deserialize.rs index 71445bfd21ae..8e3e982c8f73 100644 --- a/src/common/native/src/read/deserialize.rs +++ b/src/common/native/src/read/deserialize.rs @@ -151,6 +151,14 @@ where init, )) } + TableDataType::Interval => { + init.push(InitNested::Primitive(is_nullable)); + DynIter::new(IntervalNestedIter::<_>::new( + readers.pop().unwrap(), + data_type.clone(), + init, + )) + } TableDataType::Decimal(t) if t.precision() > MAX_DECIMAL128_PRECISION => { init.push(InitNested::Primitive(is_nullable)); DynIter::new(DecimalNestedIter::< diff --git a/src/common/native/src/write/serialize.rs b/src/common/native/src/write/serialize.rs index 8239ae9777ee..92e051cd2a59 100644 --- a/src/common/native/src/write/serialize.rs +++ b/src/common/native/src/write/serialize.rs @@ -67,7 +67,10 @@ pub fn write( Column::Date(column) => { write_primitive::(w, &column, validity, write_options, scratch) } - + Column::Interval(column) => { + let column: Buffer = unsafe { std::mem::transmute(column) }; + write_primitive::(w, &column, validity, write_options, scratch) + } Column::Binary(b) | Column::Bitmap(b) | Column::Variant(b) diff --git a/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs index 0b64ecb91f14..3d110c52b6ce 100644 --- a/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs @@ -232,6 +232,7 @@ impl FromToProto for ex::TableDataType { } Dt24::TimestampT(_) => ex::TableDataType::Timestamp, Dt24::DateT(_) => ex::TableDataType::Date, + Dt24::IntervalT(_) => ex::TableDataType::Interval, Dt24::NullableT(x) => ex::TableDataType::Nullable(Box::new( ex::TableDataType::from_pb(Box::into_inner(x))?, )), @@ -291,6 +292,7 @@ impl FromToProto for ex::TableDataType { } TableDataType::Timestamp => new_pb_dt24(Dt24::TimestampT(pb::Empty {})), TableDataType::Date => new_pb_dt24(Dt24::DateT(pb::Empty {})), + TableDataType::Interval => new_pb_dt24(Dt24::IntervalT(pb::Empty {})), TableDataType::Nullable(v) => { let x = v.to_pb()?; new_pb_dt24(Dt24::NullableT(Box::new(x))) diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index 1fd8a7ef5218..66ad6789451c 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -143,6 +143,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (111, "2024-11-13: Add: Enable AWS Glue as an Apache Iceberg type when creating a catalog."), (112, "2024-11-28: Add: virtual_column add data_types field"), (113, "2024-12-10: Add: GrantWarehouseObject"), + (114, "2024-12-12: Add: New DataType Interval."), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 8264fc85ca56..f2641a1c17d5 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -111,3 +111,4 @@ mod v110_database_meta_gc_in_progress; mod v111_add_glue_as_iceberg_catalog_option; mod v112_virtual_column; mod v113_warehouse_grantobject; +mod v114_interval_datatype; diff --git a/src/meta/proto-conv/tests/it/v114_interval_datatype.rs b/src/meta/proto-conv/tests/it/v114_interval_datatype.rs new file mode 100644 index 000000000000..3516491b0c12 --- /dev/null +++ b/src/meta/proto-conv/tests/it/v114_interval_datatype.rs @@ -0,0 +1,225 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use chrono::TimeZone; +use chrono::Utc; +use databend_common_expression::types::decimal::DecimalSize; +use databend_common_expression::types::DecimalDataType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::TableDataType; +use databend_common_expression::TableField; +use databend_common_expression::TableSchema; +use databend_common_meta_app::schema as mt; +use fastrace::func_name; +use maplit::btreemap; +use maplit::btreeset; + +use crate::common; + +// These bytes are built when a new version in introduced, +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +// The message bytes are built from the output of `test_pb_from_to()` +#[test] +fn test_decode_v114_schema() -> anyhow::Result<()> { + let table_schema_v114 = vec![ + 10, 28, 10, 1, 97, 26, 17, 154, 2, 8, 34, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, + 24, 160, 6, 114, 168, 6, 24, 10, 104, 10, 1, 98, 26, 91, 202, 2, 82, 10, 2, 98, 49, 10, 2, + 98, 50, 18, 47, 202, 2, 38, 10, 3, 98, 49, 49, 10, 3, 98, 49, 50, 18, 9, 138, 2, 0, 160, 6, + 114, 168, 6, 24, 18, 9, 146, 2, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 160, + 6, 114, 168, 6, 24, 18, 17, 154, 2, 8, 66, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, + 24, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 1, 160, 6, 114, 168, 6, 24, 10, + 30, 10, 1, 99, 26, 17, 154, 2, 8, 34, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, + 32, 4, 160, 6, 114, 168, 6, 24, 10, 49, 10, 10, 100, 101, 99, 105, 109, 97, 108, 49, 50, + 56, 26, 27, 218, 2, 18, 10, 10, 8, 18, 16, 3, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, + 24, 160, 6, 114, 168, 6, 24, 32, 5, 160, 6, 114, 168, 6, 24, 10, 49, 10, 10, 100, 101, 99, + 105, 109, 97, 108, 50, 53, 54, 26, 27, 218, 2, 18, 18, 10, 8, 46, 16, 6, 160, 6, 114, 168, + 6, 24, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 6, 160, 6, 114, 168, 6, 24, + 10, 30, 10, 9, 101, 109, 112, 116, 121, 95, 109, 97, 112, 26, 9, 226, 2, 0, 160, 6, 114, + 168, 6, 24, 32, 7, 160, 6, 114, 168, 6, 24, 10, 27, 10, 6, 98, 105, 116, 109, 97, 112, 26, + 9, 234, 2, 0, 160, 6, 114, 168, 6, 24, 32, 8, 160, 6, 114, 168, 6, 24, 10, 25, 10, 4, 103, + 101, 111, 109, 26, 9, 250, 2, 0, 160, 6, 114, 168, 6, 24, 32, 9, 160, 6, 114, 168, 6, 24, + 10, 29, 10, 8, 105, 110, 116, 101, 114, 118, 97, 108, 26, 9, 138, 3, 0, 160, 6, 114, 168, + 6, 24, 32, 10, 160, 6, 114, 168, 6, 24, 24, 11, 160, 6, 114, 168, 6, 24, + ]; + + let b1 = TableDataType::Tuple { + fields_name: vec!["b11".to_string(), "b12".to_string()], + fields_type: vec![TableDataType::Boolean, TableDataType::String], + }; + let b = TableDataType::Tuple { + fields_name: vec!["b1".to_string(), "b2".to_string()], + fields_type: vec![b1, TableDataType::Number(NumberDataType::Int64)], + }; + let fields = vec![ + TableField::new("a", TableDataType::Number(NumberDataType::UInt64)), + TableField::new("b", b), + TableField::new("c", TableDataType::Number(NumberDataType::UInt64)), + TableField::new( + "decimal128", + TableDataType::Decimal(DecimalDataType::Decimal128(DecimalSize { + precision: 18, + scale: 3, + })), + ), + TableField::new( + "decimal256", + TableDataType::Decimal(DecimalDataType::Decimal256(DecimalSize { + precision: 46, + scale: 6, + })), + ), + TableField::new("empty_map", TableDataType::EmptyMap), + TableField::new("bitmap", TableDataType::Bitmap), + TableField::new("geom", TableDataType::Geometry), + TableField::new("interval", TableDataType::Interval), + ]; + let want = || TableSchema::new(fields.clone()); + common::test_pb_from_to(func_name!(), want())?; + common::test_load_old(func_name!(), table_schema_v114.as_slice(), 114, want())?; + Ok(()) +} + +#[test] +fn test_decode_v114_table_meta() -> anyhow::Result<()> { + let table_meta_v114 = vec![ + 10, 235, 6, 10, 51, 10, 8, 110, 117, 108, 108, 97, 98, 108, 101, 18, 5, 97, 32, 43, 32, 51, + 26, 26, 178, 2, 17, 154, 2, 8, 42, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, + 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 10, 25, 10, 4, 98, 111, 111, 108, 26, 9, + 138, 2, 0, 160, 6, 114, 168, 6, 24, 32, 1, 160, 6, 114, 168, 6, 24, 10, 33, 10, 4, 105, + 110, 116, 56, 26, 17, 154, 2, 8, 42, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, + 32, 2, 160, 6, 114, 168, 6, 24, 10, 34, 10, 5, 105, 110, 116, 49, 54, 26, 17, 154, 2, 8, + 50, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 3, 160, 6, 114, 168, 6, 24, + 10, 34, 10, 5, 105, 110, 116, 51, 50, 26, 17, 154, 2, 8, 58, 0, 160, 6, 114, 168, 6, 24, + 160, 6, 114, 168, 6, 24, 32, 4, 160, 6, 114, 168, 6, 24, 10, 34, 10, 5, 105, 110, 116, 54, + 52, 26, 17, 154, 2, 8, 66, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 5, 160, + 6, 114, 168, 6, 24, 10, 34, 10, 5, 117, 105, 110, 116, 56, 26, 17, 154, 2, 8, 10, 0, 160, + 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 6, 160, 6, 114, 168, 6, 24, 10, 35, 10, 6, + 117, 105, 110, 116, 49, 54, 26, 17, 154, 2, 8, 18, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, + 168, 6, 24, 32, 7, 160, 6, 114, 168, 6, 24, 10, 35, 10, 6, 117, 105, 110, 116, 51, 50, 26, + 17, 154, 2, 8, 26, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 8, 160, 6, 114, + 168, 6, 24, 10, 35, 10, 6, 117, 105, 110, 116, 54, 52, 26, 17, 154, 2, 8, 34, 0, 160, 6, + 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 9, 160, 6, 114, 168, 6, 24, 10, 36, 10, 7, + 102, 108, 111, 97, 116, 51, 50, 26, 17, 154, 2, 8, 74, 0, 160, 6, 114, 168, 6, 24, 160, 6, + 114, 168, 6, 24, 32, 10, 160, 6, 114, 168, 6, 24, 10, 36, 10, 7, 102, 108, 111, 97, 116, + 54, 52, 26, 17, 154, 2, 8, 82, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 11, + 160, 6, 114, 168, 6, 24, 10, 25, 10, 4, 100, 97, 116, 101, 26, 9, 170, 2, 0, 160, 6, 114, + 168, 6, 24, 32, 12, 160, 6, 114, 168, 6, 24, 10, 30, 10, 9, 116, 105, 109, 101, 115, 116, + 97, 109, 112, 26, 9, 162, 2, 0, 160, 6, 114, 168, 6, 24, 32, 13, 160, 6, 114, 168, 6, 24, + 10, 27, 10, 6, 115, 116, 114, 105, 110, 103, 26, 9, 146, 2, 0, 160, 6, 114, 168, 6, 24, 32, + 14, 160, 6, 114, 168, 6, 24, 10, 65, 10, 6, 115, 116, 114, 117, 99, 116, 26, 47, 202, 2, + 38, 10, 3, 102, 111, 111, 10, 3, 98, 97, 114, 18, 9, 138, 2, 0, 160, 6, 114, 168, 6, 24, + 18, 9, 146, 2, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, + 24, 32, 15, 160, 6, 114, 168, 6, 24, 10, 35, 10, 5, 97, 114, 114, 97, 121, 26, 18, 186, 2, + 9, 138, 2, 0, 160, 6, 114, 168, 6, 24, 160, 6, 114, 168, 6, 24, 32, 17, 160, 6, 114, 168, + 6, 24, 10, 28, 10, 7, 118, 97, 114, 105, 97, 110, 116, 26, 9, 210, 2, 0, 160, 6, 114, 168, + 6, 24, 32, 18, 160, 6, 114, 168, 6, 24, 10, 34, 10, 13, 118, 97, 114, 105, 97, 110, 116, + 95, 97, 114, 114, 97, 121, 26, 9, 210, 2, 0, 160, 6, 114, 168, 6, 24, 32, 19, 160, 6, 114, + 168, 6, 24, 10, 35, 10, 14, 118, 97, 114, 105, 97, 110, 116, 95, 111, 98, 106, 101, 99, + 116, 26, 9, 210, 2, 0, 160, 6, 114, 168, 6, 24, 32, 20, 160, 6, 114, 168, 6, 24, 10, 29, + 10, 8, 105, 110, 116, 101, 114, 118, 97, 108, 26, 9, 250, 1, 0, 160, 6, 114, 168, 6, 24, + 32, 21, 160, 6, 114, 168, 6, 24, 10, 27, 10, 6, 98, 105, 116, 109, 97, 112, 26, 9, 234, 2, + 0, 160, 6, 114, 168, 6, 24, 32, 22, 160, 6, 114, 168, 6, 24, 10, 25, 10, 4, 103, 101, 111, + 109, 26, 9, 250, 2, 0, 160, 6, 114, 168, 6, 24, 32, 23, 160, 6, 114, 168, 6, 24, 10, 29, + 10, 8, 105, 110, 116, 101, 114, 118, 97, 108, 26, 9, 138, 3, 0, 160, 6, 114, 168, 6, 24, + 32, 24, 160, 6, 114, 168, 6, 24, 18, 6, 10, 1, 97, 18, 1, 98, 24, 25, 160, 6, 114, 168, 6, + 24, 34, 10, 40, 97, 32, 43, 32, 50, 44, 32, 98, 41, 42, 10, 10, 3, 120, 121, 122, 18, 3, + 102, 111, 111, 50, 2, 52, 52, 58, 10, 10, 3, 97, 98, 99, 18, 3, 100, 101, 102, 64, 0, 74, + 10, 40, 97, 32, 43, 32, 50, 44, 32, 98, 41, 162, 1, 23, 50, 48, 49, 52, 45, 49, 49, 45, 50, + 56, 32, 49, 50, 58, 48, 48, 58, 48, 57, 32, 85, 84, 67, 170, 1, 23, 50, 48, 49, 52, 45, 49, + 49, 45, 50, 57, 32, 49, 50, 58, 48, 48, 58, 49, 48, 32, 85, 84, 67, 178, 1, 13, 116, 97, + 98, 108, 101, 95, 99, 111, 109, 109, 101, 110, 116, 186, 1, 6, 160, 6, 114, 168, 6, 24, + 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, + 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, + 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, + 202, 1, 1, 99, 202, 1, 1, 99, 202, 1, 1, 99, 226, 1, 1, 1, 234, 1, 6, 10, 1, 97, 18, 1, 98, + 160, 6, 114, 168, 6, 24, + ]; + + let want = || mt::TableMeta { + schema: Arc::new(TableSchema::new_from( + vec![ + TableField::new( + "nullable", + TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::Int8))), + ) + .with_default_expr(Some("a + 3".to_string())), + TableField::new("bool", TableDataType::Boolean), + TableField::new("int8", TableDataType::Number(NumberDataType::Int8)), + TableField::new("int16", TableDataType::Number(NumberDataType::Int16)), + TableField::new("int32", TableDataType::Number(NumberDataType::Int32)), + TableField::new("int64", TableDataType::Number(NumberDataType::Int64)), + TableField::new("uint8", TableDataType::Number(NumberDataType::UInt8)), + TableField::new("uint16", TableDataType::Number(NumberDataType::UInt16)), + TableField::new("uint32", TableDataType::Number(NumberDataType::UInt32)), + TableField::new("uint64", TableDataType::Number(NumberDataType::UInt64)), + TableField::new("float32", TableDataType::Number(NumberDataType::Float32)), + TableField::new("float64", TableDataType::Number(NumberDataType::Float64)), + TableField::new("date", TableDataType::Date), + TableField::new("timestamp", TableDataType::Timestamp), + TableField::new("string", TableDataType::String), + TableField::new("struct", TableDataType::Tuple { + fields_name: vec![s("foo"), s("bar")], + fields_type: vec![TableDataType::Boolean, TableDataType::String], + }), + TableField::new( + "array", + TableDataType::Array(Box::new(TableDataType::Boolean)), + ), + TableField::new("variant", TableDataType::Variant), + TableField::new("variant_array", TableDataType::Variant), + TableField::new("variant_object", TableDataType::Variant), + // NOTE: It is safe to convert Interval to NULL, because `Interval` is never really used. + TableField::new("interval", TableDataType::Null), + TableField::new("bitmap", TableDataType::Bitmap), + TableField::new("geom", TableDataType::Geometry), + TableField::new("interval", TableDataType::Interval), + ], + btreemap! {s("a") => s("b")}, + )), + engine: "44".to_string(), + storage_params: None, + part_prefix: "".to_string(), + engine_options: btreemap! {s("abc") => s("def")}, + options: btreemap! {s("xyz") => s("foo")}, + default_cluster_key: Some("(a + 2, b)".to_string()), + cluster_keys: vec!["(a + 2, b)".to_string()], + default_cluster_key_id: Some(0), + created_on: Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 9).unwrap(), + updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(), + comment: s("table_comment"), + field_comments: vec!["c".to_string(); 21], + drop_on: None, + statistics: Default::default(), + shared_by: btreeset! {1}, + column_mask_policy: Some(btreemap! {s("a") => s("b")}), + indexes: btreemap! {}, + }; + common::test_pb_from_to(func_name!(), want())?; + common::test_load_old(func_name!(), table_meta_v114.as_slice(), 114, want())?; + + Ok(()) +} + +fn s(ss: impl ToString) -> String { + ss.to_string() +} diff --git a/src/meta/protos/proto/datatype.proto b/src/meta/protos/proto/datatype.proto index ba81e97f90fc..b5ff9dd570c4 100644 --- a/src/meta/protos/proto/datatype.proto +++ b/src/meta/protos/proto/datatype.proto @@ -66,6 +66,7 @@ message DataType { Empty bitmap_t = 45; Empty geometry_t = 47; Empty geography_t = 48; + Empty interval_t = 49; } } diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index d19f00b2278b..4eda49a734c9 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -1021,6 +1021,7 @@ pub enum TypeName { Variant, Geometry, Geography, + Interval, Nullable(Box), NotNull(Box), } @@ -1149,6 +1150,9 @@ impl Display for TypeName { TypeName::NotNull(ty) => { write!(f, "{} NOT NULL", ty)?; } + TypeName::Interval => { + write!(f, "INTERVAL")?; + } } Ok(()) } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index a1523d4d66ab..a7fd524f26dc 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1698,6 +1698,7 @@ pub fn type_name(i: Input) -> IResult { }, ); let ty_date = value(TypeName::Date, rule! { DATE }); + let ty_interval = value(TypeName::Interval, rule! { INTERVAL }); let ty_datetime = map( rule! { ( DATETIME | TIMESTAMP ) ~ ( "(" ~ ^#literal_u64 ~ ^")" )? }, |(_, _)| TypeName::Timestamp, @@ -1738,6 +1739,7 @@ pub fn type_name(i: Input) -> IResult { rule! { ( #ty_date | #ty_datetime + | #ty_interval | #ty_binary | #ty_string | #ty_variant diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index 01717c62edfc..4e0216bf983a 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -29,7 +29,7 @@ error: --> SQL:1:14 | 1 | CAST(col1 AS foo) - | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` + | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` | | | while parsing `CAST(... AS ...)` | while parsing expression diff --git a/src/query/ast/tests/it/testdata/stmt-error.txt b/src/query/ast/tests/it/testdata/stmt-error.txt index 7d954117f30a..9c53c0f64c52 100644 --- a/src/query/ast/tests/it/testdata/stmt-error.txt +++ b/src/query/ast/tests/it/testdata/stmt-error.txt @@ -29,7 +29,7 @@ error: --> SQL:1:19 | 1 | create table a (c varch) - | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `GEOGRAPHY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, `JSON`, or `GEOMETRY` + | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `GEOGRAPHY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `INTERVAL`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, `JSON`, or `GEOMETRY` | | | | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` | while parsing `CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [.] [] []` @@ -42,7 +42,7 @@ error: --> SQL:1:25 | 1 | create table a (c tuple()) - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, , , or `IDENTIFIER` + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, , , or `IDENTIFIER` | | | | | | | while parsing type name | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` @@ -70,7 +70,7 @@ error: --> SQL:1:38 | 1 | create table a (b tuple(c int, uint64)); - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` | | | | | | | while parsing TUPLE( , ...) | | | while parsing type name @@ -955,7 +955,7 @@ error: | ------ while parsing `CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [(, ...)] PRIMARY KEY [, ...] SOURCE ( ([])) [COMMENT ] ` 2 | ( 3 | user_name tuple(), - | --------- ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, , , or `IDENTIFIER` + | --------- ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, , , or `IDENTIFIER` | | | | | while parsing type name | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` @@ -1006,7 +1006,7 @@ error: --> SQL:1:19 | 1 | drop procedure p1(a int) - | ---- ^ unexpected `a`, expecting `DATE`, `ARRAY`, `VARCHAR`, `VARIANT`, `SMALLINT`, `DATETIME`, `VARBINARY`, `CHARACTER`, `)`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `MAP`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `BINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `CHAR`, `TEXT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` + | ---- ^ unexpected `a`, expecting `DATE`, `ARRAY`, `VARCHAR`, `VARIANT`, `SMALLINT`, `DATETIME`, `VARBINARY`, `CHARACTER`, `)`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `MAP`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `CHAR`, `TEXT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` | | | while parsing `DROP PROCEDURE ()` @@ -1038,7 +1038,7 @@ error: --> SQL:1:44 | 1 | create PROCEDURE p1() returns table(string not null, int null) language sql comment = 'test' as $$ - | ------ ----- ^^^ unexpected `not`, expecting `INT8`, `INT16`, `INT32`, `INT64`, `UINT16`, `UINT32`, `UINT64`, `INTEGER`, `FLOAT32`, `FLOAT64`, `GEOMETRY`, `INT`, `BOOL`, `DATE`, `BLOB`, `TEXT`, `JSON`, `UINT8`, `FLOAT`, `TUPLE`, `DOUBLE`, `BITMAP`, `BINARY`, `STRING`, `BOOLEAN`, `UNSIGNED`, `DATETIME`, `NULLABLE`, `TIMESTAMP`, `GEOGRAPHY`, `TINYINT`, `LONGBLOB`, `TINYBLOB`, `SMALLINT`, `BIGINT`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `VARBINARY`, `MEDIUMBLOB`, `VARCHAR`, `CHAR`, `CHARACTER`, or `VARIANT` + | ------ ----- ^^^ unexpected `not`, expecting `INT8`, `INT16`, `INT32`, `INT64`, `UINT16`, `UINT32`, `UINT64`, `INTEGER`, `FLOAT32`, `FLOAT64`, `INTERVAL`, `GEOMETRY`, `INT`, `BOOL`, `DATE`, `BLOB`, `TEXT`, `JSON`, `UINT8`, `FLOAT`, `TUPLE`, `DOUBLE`, `BITMAP`, `BINARY`, `STRING`, `BOOLEAN`, `UNSIGNED`, `DATETIME`, `NULLABLE`, `TIMESTAMP`, `GEOGRAPHY`, `TINYINT`, `LONGBLOB`, `TINYBLOB`, `SMALLINT`, `BIGINT`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `VARBINARY`, `MEDIUMBLOB`, `VARCHAR`, `CHAR`, `CHARACTER`, or `VARIANT` | | | | | while parsing TABLE( , ...) | while parsing `CREATE [ OR REPLACE ] PROCEDURE () RETURNS { [ NOT NULL ] | TABLE( , ...)} LANGUAGE SQL [ COMMENT = '' ] AS ` @@ -1059,7 +1059,7 @@ error: --> SQL:1:24 | 1 | create PROCEDURE p1(int, string) returns table(string not null, int null) language sql comment = 'test' as $$ - | ------ - ^ unexpected `,`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` + | ------ - ^ unexpected `,`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `INTERVAL`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE` | | | | | while parsing ( , ...) | while parsing `CREATE [ OR REPLACE ] PROCEDURE () RETURNS { [ NOT NULL ] | TABLE( , ...)} LANGUAGE SQL [ COMMENT = '' ] AS ` diff --git a/src/query/expression/src/aggregate/payload_row.rs b/src/query/expression/src/aggregate/payload_row.rs index 4c81a1371fd6..ce8b908e0b5a 100644 --- a/src/query/expression/src/aggregate/payload_row.rs +++ b/src/query/expression/src/aggregate/payload_row.rs @@ -53,6 +53,7 @@ pub fn rowformat_size(data_type: &DataType) -> usize { }, DataType::Timestamp => 8, DataType::Date => 4, + DataType::Interval => 16, // use address instead DataType::Binary | DataType::String diff --git a/src/query/expression/src/converts/arrow/from.rs b/src/query/expression/src/converts/arrow/from.rs index c081fd9e098b..b8e342e1151f 100644 --- a/src/query/expression/src/converts/arrow/from.rs +++ b/src/query/expression/src/converts/arrow/from.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use arrow_array::Array; use arrow_array::ArrayRef; use arrow_array::RecordBatch; use arrow_schema::DataType as ArrowDataType; @@ -23,6 +24,7 @@ use databend_common_column::binary::BinaryColumn; use databend_common_column::binview::StringColumn; use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -31,6 +33,7 @@ use super::ARROW_EXT_TYPE_EMPTY_ARRAY; use super::ARROW_EXT_TYPE_EMPTY_MAP; use super::ARROW_EXT_TYPE_GEOGRAPHY; use super::ARROW_EXT_TYPE_GEOMETRY; +use super::ARROW_EXT_TYPE_INTERVAL; use super::ARROW_EXT_TYPE_VARIANT; use super::EXTENSION_KEY; use crate::types::ArrayColumn; @@ -72,6 +75,7 @@ impl TryFrom<&Field> for TableField { ARROW_EXT_TYPE_VARIANT => TableDataType::Variant, ARROW_EXT_TYPE_GEOMETRY => TableDataType::Geometry, ARROW_EXT_TYPE_GEOGRAPHY => TableDataType::Geography, + ARROW_EXT_TYPE_INTERVAL => TableDataType::Interval, _ => match arrow_f.data_type() { ArrowDataType::Null => TableDataType::Null, ArrowDataType::Boolean => TableDataType::Boolean, @@ -258,6 +262,12 @@ impl Column { let buffer: Buffer = array.to_data().buffers()[0].clone().into(); Column::Date(buffer) } + DataType::Interval => { + let array = arrow_cast::cast(array.as_ref(), &ArrowDataType::Decimal128(38, 0))?; + let buffer: Buffer = + array.to_data().buffers()[0].clone().into(); + Column::Interval(buffer) + } DataType::Nullable(_) => { let validity = match array.nulls() { Some(nulls) => Bitmap::from_null_buffer(nulls.clone()), diff --git a/src/query/expression/src/converts/arrow/mod.rs b/src/query/expression/src/converts/arrow/mod.rs index bacab47f7a70..0dce3e3dae1d 100644 --- a/src/query/expression/src/converts/arrow/mod.rs +++ b/src/query/expression/src/converts/arrow/mod.rs @@ -22,3 +22,4 @@ pub const ARROW_EXT_TYPE_VARIANT: &str = "Variant"; pub const ARROW_EXT_TYPE_BITMAP: &str = "Bitmap"; pub const ARROW_EXT_TYPE_GEOMETRY: &str = "Geometry"; pub const ARROW_EXT_TYPE_GEOGRAPHY: &str = "Geography"; +pub const ARROW_EXT_TYPE_INTERVAL: &str = "Interval"; diff --git a/src/query/expression/src/converts/arrow/to.rs b/src/query/expression/src/converts/arrow/to.rs index b2c486a7f156..0a2cb329a66a 100644 --- a/src/query/expression/src/converts/arrow/to.rs +++ b/src/query/expression/src/converts/arrow/to.rs @@ -33,6 +33,7 @@ use super::ARROW_EXT_TYPE_EMPTY_ARRAY; use super::ARROW_EXT_TYPE_EMPTY_MAP; use super::ARROW_EXT_TYPE_GEOGRAPHY; use super::ARROW_EXT_TYPE_GEOMETRY; +use super::ARROW_EXT_TYPE_INTERVAL; use super::ARROW_EXT_TYPE_VARIANT; use super::EXTENSION_KEY; use crate::infer_table_schema; @@ -187,6 +188,13 @@ impl From<&TableField> for Field { ); ArrowDataType::LargeBinary } + TableDataType::Interval => { + metadata.insert( + EXTENSION_KEY.to_string(), + ARROW_EXT_TYPE_INTERVAL.to_string(), + ); + ArrowDataType::Decimal128(38, 0) + } }; Field::new(f.name(), ty, f.is_nullable()).with_metadata(metadata) @@ -293,6 +301,7 @@ impl From<&Column> for ArrayData { Column::String(col) => col.clone().into(), Column::Timestamp(col) => buffer_to_array_data((col.clone(), arrow_type)), Column::Date(col) => buffer_to_array_data((col.clone(), arrow_type)), + Column::Interval(col) => buffer_to_array_data((col.clone(), arrow_type)), Column::Array(col) => { let child_data = ArrayData::from(&col.values); let builder = ArrayDataBuilder::new(arrow_type) diff --git a/src/query/expression/src/converts/datavalues/to.rs b/src/query/expression/src/converts/datavalues/to.rs index 266b65e4e6c8..bee131e2d81a 100644 --- a/src/query/expression/src/converts/datavalues/to.rs +++ b/src/query/expression/src/converts/datavalues/to.rs @@ -38,6 +38,7 @@ pub fn scalar_to_datavalue(scalar: &Scalar) -> DataValue { Scalar::Decimal(_) => unimplemented!("decimal type is not supported"), Scalar::Timestamp(x) => DataValue::Int64(*x), Scalar::Date(x) => DataValue::Int64(*x as i64), + Scalar::Interval(_) => unimplemented!("Interval type is not supported"), Scalar::Boolean(x) => DataValue::Boolean(*x), Scalar::Variant(x) => DataValue::String(x.clone()), Scalar::Geometry(x) => DataValue::String(x.clone()), diff --git a/src/query/expression/src/converts/meta/bincode.rs b/src/query/expression/src/converts/meta/bincode.rs index b1cb2d656dfc..3aed151bd5d6 100644 --- a/src/query/expression/src/converts/meta/bincode.rs +++ b/src/query/expression/src/converts/meta/bincode.rs @@ -17,6 +17,7 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; use enum_as_inner::EnumAsInner; use serde::Deserialize; use serde::Deserializer; @@ -43,6 +44,7 @@ pub enum LegacyScalar { Decimal(DecimalScalar), Timestamp(i64), Date(i32), + Interval(months_days_micros), Boolean(bool), String(Vec), Array(LegacyColumn), @@ -63,6 +65,7 @@ pub enum LegacyColumn { String(LegacyBinaryColumn), Timestamp(Buffer), Date(Buffer), + Interval(Buffer), Array(Box), Map(Box), Bitmap(LegacyBinaryColumn), @@ -99,6 +102,7 @@ impl From for Scalar { LegacyScalar::Decimal(dec_scalar) => Scalar::Decimal(dec_scalar), LegacyScalar::Timestamp(ts) => Scalar::Timestamp(ts), LegacyScalar::Date(date) => Scalar::Date(date), + LegacyScalar::Interval(interval) => Scalar::Interval(interval), LegacyScalar::Boolean(b) => Scalar::Boolean(b), LegacyScalar::String(s) => Scalar::String(String::from_utf8_lossy(&s).into_owned()), LegacyScalar::Array(col) => Scalar::Array(col.into()), @@ -139,6 +143,7 @@ impl From for Column { } LegacyColumn::Timestamp(buf) => Column::Timestamp(buf), LegacyColumn::Date(buf) => Column::Date(buf), + LegacyColumn::Interval(buf) => Column::Interval(buf), LegacyColumn::Array(arr_col) => Column::Array(Box::new(ArrayColumn:: { values: arr_col.values.into(), offsets: arr_col.offsets, @@ -172,6 +177,7 @@ impl From for LegacyScalar { Scalar::Decimal(dec_scalar) => LegacyScalar::Decimal(dec_scalar), Scalar::Timestamp(ts) => LegacyScalar::Timestamp(ts), Scalar::Date(date) => LegacyScalar::Date(date), + Scalar::Interval(interval) => LegacyScalar::Interval(interval), Scalar::Boolean(b) => LegacyScalar::Boolean(b), Scalar::Binary(_) | Scalar::Geometry(_) | Scalar::Geography(_) => unreachable!(), Scalar::String(string) => LegacyScalar::String(string.as_bytes().to_vec()), @@ -199,6 +205,7 @@ impl From for LegacyColumn { } Column::Timestamp(buf) => LegacyColumn::Timestamp(buf), Column::Date(buf) => LegacyColumn::Date(buf), + Column::Interval(buf) => LegacyColumn::Interval(buf), Column::Array(arr_col) => LegacyColumn::Array(Box::new(LegacyArrayColumn { values: arr_col.values.into(), offsets: arr_col.offsets, diff --git a/src/query/expression/src/converts/meta/index_scalar.rs b/src/query/expression/src/converts/meta/index_scalar.rs index e8f58c3fed3e..5231a12a5bff 100644 --- a/src/query/expression/src/converts/meta/index_scalar.rs +++ b/src/query/expression/src/converts/meta/index_scalar.rs @@ -15,6 +15,7 @@ // DO NOT EDIT. // This crate keeps some Index codes for compatibility, it's locked by bincode of meta's v3 version +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use enum_as_inner::EnumAsInner; @@ -32,6 +33,7 @@ pub enum IndexScalar { Decimal(DecimalScalar), Timestamp(i64), Date(i32), + Interval(months_days_micros), Boolean(bool), // For compat reason, we keep this attribute which treat string/binary into string #[serde(alias = "String", alias = "Binary")] @@ -51,6 +53,7 @@ impl TryFrom for Scalar { IndexScalar::Decimal(dec_scalar) => Scalar::Decimal(dec_scalar), IndexScalar::Timestamp(ts) => Scalar::Timestamp(ts), IndexScalar::Date(date) => Scalar::Date(date), + IndexScalar::Interval(interval) => Scalar::Interval(interval), IndexScalar::Boolean(b) => Scalar::Boolean(b), IndexScalar::String(s) => Scalar::String(String::from_utf8(s).map_err(|e| { ErrorCode::InvalidUtf8String(format!("invalid utf8 data for string type: {}", e)) @@ -77,6 +80,7 @@ impl TryFrom for IndexScalar { Scalar::Decimal(dec_scalar) => IndexScalar::Decimal(dec_scalar), Scalar::Timestamp(ts) => IndexScalar::Timestamp(ts), Scalar::Date(date) => IndexScalar::Date(date), + Scalar::Interval(interval) => IndexScalar::Interval(interval), Scalar::Boolean(b) => IndexScalar::Boolean(b), Scalar::String(string) => IndexScalar::String(string.as_bytes().to_vec()), Scalar::Binary(s) => IndexScalar::BinaryV2(s), diff --git a/src/query/expression/src/kernels/concat.rs b/src/query/expression/src/kernels/concat.rs index 6a51c0b56f4b..ecdd087cea8e 100644 --- a/src/query/expression/src/kernels/concat.rs +++ b/src/query/expression/src/kernels/concat.rs @@ -35,6 +35,7 @@ use crate::types::BooleanType; use crate::types::DataType; use crate::types::DateType; use crate::types::DecimalType; +use crate::types::IntervalType; use crate::types::MapType; use crate::types::NumberType; use crate::types::TimestampType; @@ -164,6 +165,13 @@ impl Column { ); Column::Date(buffer) } + Column::Interval(_) => { + let buffer = Self::concat_primitive_types( + columns.map(|col| IntervalType::try_downcast_column(&col).unwrap()), + capacity, + ); + Column::Interval(buffer) + } Column::Array(col) => { let mut offsets = Vec::with_capacity(capacity + 1); offsets.push(0); diff --git a/src/query/expression/src/kernels/group_by_hash/utils.rs b/src/query/expression/src/kernels/group_by_hash/utils.rs index d682067e373d..0d786e55aa71 100644 --- a/src/query/expression/src/kernels/group_by_hash/utils.rs +++ b/src/query/expression/src/kernels/group_by_hash/utils.rs @@ -89,6 +89,7 @@ pub unsafe fn serialize_column_binary(column: &Column, row: usize, row_space: &m } Column::Timestamp(v) => row_space.store_value_uncheckd(&v[row]), Column::Date(v) => row_space.store_value_uncheckd(&v[row]), + Column::Interval(v) => row_space.store_value_uncheckd(&v[row]), Column::Array(array) | Column::Map(array) => { let data = array.index(row).unwrap(); row_space.store_value_uncheckd(&(data.len() as u64)); diff --git a/src/query/expression/src/kernels/take_chunks.rs b/src/query/expression/src/kernels/take_chunks.rs index 6573e1d0fd09..ffb9a4a6a5e4 100644 --- a/src/query/expression/src/kernels/take_chunks.rs +++ b/src/query/expression/src/kernels/take_chunks.rs @@ -299,6 +299,10 @@ impl Column { let builder = DateType::create_builder(result_size, &[]); Self::take_block_value_types::(columns, builder, indices) } + Column::Interval(_) => { + let builder = IntervalType::create_builder(result_size, &[]); + Self::take_block_value_types::(columns, builder, indices) + } Column::Array(column) => { let mut offsets = Vec::with_capacity(result_size + 1); offsets.push(0); @@ -520,6 +524,13 @@ impl Column { .collect_vec(); ColumnVec::Timestamp(columns) } + Column::Interval(_) => { + let columns = columns + .iter() + .map(|col| IntervalType::try_downcast_column(col).unwrap()) + .collect_vec(); + ColumnVec::Interval(columns) + } Column::Date(_) => { let columns = columns .iter() @@ -683,6 +694,14 @@ impl Column { .unwrap(); Column::Date(d) } + ColumnVec::Interval(columns) => { + let builder = Self::take_block_vec_primitive_types(columns, indices); + let i = + ::upcast_column(::column_from_vec(builder, &[])) + .into_interval() + .unwrap(); + Column::Interval(i) + } ColumnVec::Array(columns) => { let data_type = data_type.as_array().unwrap(); let mut offsets = Vec::with_capacity(result_size + 1); diff --git a/src/query/expression/src/property.rs b/src/query/expression/src/property.rs index 5c7f27abb49c..47137fb188d3 100644 --- a/src/query/expression/src/property.rs +++ b/src/query/expression/src/property.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_column::types::months_days_micros; use enum_as_inner::EnumAsInner; use crate::types::boolean::BooleanDomain; @@ -32,6 +33,7 @@ use crate::types::BooleanType; use crate::types::DataType; use crate::types::DateType; use crate::types::DecimalDataType; +use crate::types::IntervalType; use crate::types::NumberDataType; use crate::types::NumberType; use crate::types::StringType; @@ -99,6 +101,7 @@ pub enum Domain { String(StringDomain), Timestamp(SimpleDomain), Date(SimpleDomain), + Interval(SimpleDomain), Nullable(NullableDomain), /// `Array(None)` means that the array is empty, thus there is no inner domain information. Array(Option>), @@ -177,6 +180,7 @@ impl Domain { }, DataType::Timestamp => Domain::Timestamp(TimestampType::full_domain()), DataType::Date => Domain::Date(DateType::full_domain()), + DataType::Interval => Domain::Interval(IntervalType::full_domain()), DataType::Null => Domain::Nullable(NullableDomain { has_null: true, value: None, diff --git a/src/query/expression/src/row/fixed.rs b/src/query/expression/src/row/fixed.rs index 74c7e27f598e..9cb742db6b7d 100644 --- a/src/query/expression/src/row/fixed.rs +++ b/src/query/expression/src/row/fixed.rs @@ -13,6 +13,8 @@ // limitations under the License. use databend_common_column::bitmap::Bitmap; +use databend_common_column::types::months_days_micros; +use databend_common_column::types::NativeType; use ethnum::i256; use super::row_converter::null_sentinel; @@ -98,6 +100,14 @@ impl FixedLengthEncoding for F64 { } } +impl FixedLengthEncoding for months_days_micros { + type Encoded = [u8; 16]; + + fn encode(self) -> [u8; 16] { + self.to_be_bytes() + } +} + pub fn encode( out: &mut BinaryColumnBuilder, iter: I, diff --git a/src/query/expression/src/row/row_converter.rs b/src/query/expression/src/row/row_converter.rs index 58219ebb05b7..1eb1e1c16b91 100644 --- a/src/query/expression/src/row/row_converter.rs +++ b/src/query/expression/src/row/row_converter.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use ethnum::i256; @@ -62,6 +63,7 @@ impl RowConverter { | DataType::Number(_) | DataType::Decimal(_) | DataType::Timestamp + | DataType::Interval | DataType::Date | DataType::Binary | DataType::String @@ -117,6 +119,9 @@ impl RowConverter { DataType::Timestamp => lengths .iter_mut() .for_each(|x| *x += i64::ENCODED_LEN as u64), + DataType::Interval => lengths + .iter_mut() + .for_each(|x| *x += months_days_micros::ENCODED_LEN as u64), DataType::Date => lengths .iter_mut() .for_each(|x| *x += i32::ENCODED_LEN as u64), @@ -254,6 +259,7 @@ fn encode_column(out: &mut BinaryColumnBuilder, column: &Column, asc: bool, null }) } Column::Timestamp(col) => fixed::encode(out, col, validity, asc, nulls_first), + Column::Interval(col) => fixed::encode(out, col, validity, asc, nulls_first), Column::Date(col) => fixed::encode(out, col, validity, asc, nulls_first), Column::Binary(col) => variable::encode(out, col.iter(), validity, asc, nulls_first), Column::String(col) => variable::encode( diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs index 92ca3ecefb08..9801a0650056 100644 --- a/src/query/expression/src/schema.rs +++ b/src/query/expression/src/schema.rs @@ -207,6 +207,7 @@ pub enum TableDataType { Variant, Geometry, Geography, + Interval, } impl DataSchema { @@ -1178,6 +1179,7 @@ impl From<&TableDataType> for DataType { TableDataType::Boolean => DataType::Boolean, TableDataType::Binary => DataType::Binary, TableDataType::String => DataType::String, + TableDataType::Interval => DataType::Interval, TableDataType::Number(ty) => DataType::Number(*ty), TableDataType::Decimal(ty) => DataType::Decimal(*ty), TableDataType::Timestamp => DataType::Timestamp, @@ -1430,6 +1432,7 @@ pub fn infer_schema_type(data_type: &DataType) -> Result { DataType::Timestamp => Ok(TableDataType::Timestamp), DataType::Decimal(x) => Ok(TableDataType::Decimal(*x)), DataType::Date => Ok(TableDataType::Date), + DataType::Interval => Ok(TableDataType::Interval), DataType::Nullable(inner_type) => Ok(TableDataType::Nullable(Box::new(infer_schema_type( inner_type, )?))), diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index 73085a23ca50..e5ddf6ad2689 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -725,6 +725,7 @@ pub const ALL_SIMPLE_CAST_FUNCTIONS: &[&str] = &[ "to_float32", "to_float64", "to_timestamp", + "to_interval", "to_date", "to_variant", "to_boolean", diff --git a/src/query/expression/src/types.rs b/src/query/expression/src/types.rs index c24435832757..95ed56924541 100755 --- a/src/query/expression/src/types.rs +++ b/src/query/expression/src/types.rs @@ -24,6 +24,7 @@ pub mod empty_map; pub mod generic; pub mod geography; pub mod geometry; +pub mod interval; pub mod map; pub mod null; pub mod nullable; @@ -60,6 +61,7 @@ pub use self::generic::GenericType; pub use self::geography::GeographyColumn; pub use self::geography::GeographyType; pub use self::geometry::GeometryType; +pub use self::interval::IntervalType; pub use self::map::MapType; pub use self::null::NullType; pub use self::nullable::NullableColumn; @@ -97,6 +99,7 @@ pub enum DataType { Tuple(Vec), Variant, Geometry, + Interval, Geography, // Used internally for generic types @@ -149,6 +152,7 @@ impl DataType { | DataType::Decimal(_) | DataType::Timestamp | DataType::Date + | DataType::Interval | DataType::Bitmap | DataType::Variant | DataType::Geometry @@ -173,6 +177,7 @@ impl DataType { | DataType::Decimal(_) | DataType::Timestamp | DataType::Date + | DataType::Interval | DataType::Bitmap | DataType::Variant | DataType::Geometry diff --git a/src/query/expression/src/types/interval.rs b/src/query/expression/src/types/interval.rs new file mode 100644 index 000000000000..1ddc3a52458c --- /dev/null +++ b/src/query/expression/src/types/interval.rs @@ -0,0 +1,248 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; +use std::fmt::Display; +use std::ops::Range; + +use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; +use databend_common_io::Interval; + +use super::number::SimpleDomain; +use crate::property::Domain; +use crate::types::ArgType; +use crate::types::DataType; +use crate::types::DecimalSize; +use crate::types::GenericMap; +use crate::types::ValueType; +use crate::utils::arrow::buffer_into_mut; +use crate::values::Column; +use crate::values::Scalar; +use crate::ColumnBuilder; +use crate::ScalarRef; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IntervalType; + +impl ValueType for IntervalType { + type Scalar = months_days_micros; + type ScalarRef<'a> = months_days_micros; + type Column = Buffer; + type Domain = SimpleDomain; + type ColumnIterator<'a> = std::iter::Cloned>; + type ColumnBuilder = Vec; + + #[inline] + fn upcast_gat<'short, 'long: 'short>(long: months_days_micros) -> months_days_micros { + long + } + + fn to_owned_scalar(scalar: Self::ScalarRef<'_>) -> Self::Scalar { + scalar + } + + fn to_scalar_ref(scalar: &Self::Scalar) -> Self::ScalarRef<'_> { + *scalar + } + + fn try_downcast_scalar<'a>(scalar: &'a ScalarRef) -> Option> { + match scalar { + ScalarRef::Interval(scalar) => Some(*scalar), + _ => None, + } + } + + fn try_downcast_column(col: &Column) -> Option { + match col { + Column::Interval(column) => Some(column.clone()), + _ => None, + } + } + + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_interval().cloned() + } + + fn try_downcast_builder(builder: &mut ColumnBuilder) -> Option<&mut Self::ColumnBuilder> { + match builder { + ColumnBuilder::Interval(builder) => Some(builder), + _ => None, + } + } + + fn try_downcast_owned_builder(builder: ColumnBuilder) -> Option { + match builder { + ColumnBuilder::Interval(builder) => Some(builder), + _ => None, + } + } + + fn try_upcast_column_builder( + builder: Self::ColumnBuilder, + _decimal_size: Option, + ) -> Option { + Some(ColumnBuilder::Interval(builder)) + } + + fn upcast_scalar(scalar: Self::Scalar) -> Scalar { + Scalar::Interval(scalar) + } + + fn upcast_column(col: Self::Column) -> Column { + Column::Interval(col) + } + + fn upcast_domain(domain: SimpleDomain) -> Domain { + Domain::Interval(domain) + } + + fn column_len(col: &Self::Column) -> usize { + col.len() + } + + fn index_column(col: &Self::Column, index: usize) -> Option> { + col.get(index).cloned() + } + + #[inline(always)] + unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + + *col.get_unchecked(index) + } + + fn slice_column(col: &Self::Column, range: Range) -> Self::Column { + col.clone().sliced(range.start, range.end - range.start) + } + + fn iter_column(col: &Self::Column) -> Self::ColumnIterator<'_> { + col.iter().cloned() + } + + fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { + buffer_into_mut(col) + } + + fn builder_len(builder: &Self::ColumnBuilder) -> usize { + builder.len() + } + + fn push_item(builder: &mut Self::ColumnBuilder, item: Self::Scalar) { + builder.push(item); + } + + fn push_item_repeat(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>, n: usize) { + builder.resize(builder.len() + n, item); + } + + fn push_default(builder: &mut Self::ColumnBuilder) { + builder.push(Self::Scalar::default()); + } + + fn append_column(builder: &mut Self::ColumnBuilder, other: &Self::Column) { + builder.extend_from_slice(other); + } + + fn build_column(builder: Self::ColumnBuilder) -> Self::Column { + builder.into() + } + + fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { + assert_eq!(builder.len(), 1); + builder[0] + } + + #[inline(always)] + fn compare(lhs: Self::ScalarRef<'_>, rhs: Self::ScalarRef<'_>) -> Ordering { + lhs.0.cmp(&rhs.0) + } + + #[inline(always)] + fn equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left == right + } + + #[inline(always)] + fn not_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left != right + } + + #[inline(always)] + fn greater_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left > right + } + + #[inline(always)] + fn less_than(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left < right + } + + #[inline(always)] + fn greater_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left >= right + } + + #[inline(always)] + fn less_than_equal(left: Self::ScalarRef<'_>, right: Self::ScalarRef<'_>) -> bool { + left <= right + } +} + +impl ArgType for IntervalType { + fn data_type() -> DataType { + DataType::Interval + } + + fn full_domain() -> Self::Domain { + SimpleDomain { + min: months_days_micros::new(-12 * 200, -365 * 200, -7200000000000000000), + max: months_days_micros::new(12 * 200, 365 * 200, 7200000000000000000), + } + } + + fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { + Vec::with_capacity(capacity) + } + + fn column_from_vec(vec: Vec, _generics: &GenericMap) -> Self::Column { + vec.into() + } + + fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { + iter.collect() + } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.collect() + } +} + +#[inline] +pub fn string_to_interval(interval_str: &str) -> databend_common_exception::Result { + Interval::from_string(interval_str) +} + +#[inline] +pub fn interval_to_string(i: &months_days_micros) -> impl Display { + let interval = Interval { + months: i.months(), + days: i.days(), + micros: i.microseconds(), + }; + interval.to_string() +} diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index afe816d0b580..b9bb4fbb1fce 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -29,6 +29,7 @@ use super::date::date_to_string; use super::number::NumberScalar; use super::timestamp::timestamp_to_string; use crate::property::Domain; +use crate::types::interval::interval_to_string; use crate::types::map::KvPair; use crate::types::AnyType; use crate::types::ArgType; @@ -231,6 +232,7 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec jsonb::Value::String(s.into()), ScalarRef::Timestamp(ts) => timestamp_to_string(ts, tz).to_string().into(), ScalarRef::Date(d) => date_to_string(d, tz).to_string().into(), + ScalarRef::Interval(i) => interval_to_string(&i).to_string().into(), ScalarRef::Array(col) => { let items = cast_scalars_to_variants(col.iter(), tz); jsonb::build_array(items.iter(), buf).expect("failed to build jsonb array"); diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index a714ded0738f..e4a49c3bb770 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -45,6 +45,7 @@ use crate::types::decimal::DecimalColumn; use crate::types::decimal::DecimalDataType; use crate::types::decimal::DecimalDomain; use crate::types::decimal::DecimalScalar; +use crate::types::interval::interval_to_string; use crate::types::map::KvPair; use crate::types::nullable::NullableDomain; use crate::types::number::NumberColumn; @@ -126,6 +127,10 @@ impl Debug for ScalarRef<'_> { ScalarRef::String(s) => write!(f, "{s:?}"), ScalarRef::Timestamp(t) => write!(f, "{t:?}"), ScalarRef::Date(d) => write!(f, "{d:?}"), + ScalarRef::Interval(i) => { + let interval = interval_to_string(i); + write!(f, "{interval}") + } ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), ScalarRef::Map(col) => { write!(f, "{{")?; @@ -193,6 +198,7 @@ impl Debug for Column { Column::String(col) => write!(f, "{col:?}"), Column::Timestamp(col) => write!(f, "{col:?}"), Column::Date(col) => write!(f, "{col:?}"), + Column::Interval(col) => write!(f, "{col:?}"), Column::Array(col) => write!(f, "{col:?}"), Column::Map(col) => write!(f, "{col:?}"), Column::Bitmap(col) => write!(f, "{col:?}"), @@ -223,6 +229,7 @@ impl Display for ScalarRef<'_> { ScalarRef::String(s) => write!(f, "'{s}'"), ScalarRef::Timestamp(t) => write!(f, "'{}'", timestamp_to_string(*t, &TimeZone::UTC)), ScalarRef::Date(d) => write!(f, "'{}'", date_to_string(*d as i64, &TimeZone::UTC)), + ScalarRef::Interval(interval) => write!(f, "{}", interval_to_string(interval)), ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), ScalarRef::Map(col) => { write!(f, "{{")?; @@ -484,6 +491,7 @@ impl Display for DataType { DataType::Decimal(decimal) => write!(f, "{decimal}"), DataType::Timestamp => write!(f, "Timestamp"), DataType::Date => write!(f, "Date"), + DataType::Interval => write!(f, "Interval"), DataType::Null => write!(f, "NULL"), DataType::Nullable(inner) => write!(f, "{inner} NULL"), DataType::EmptyArray => write!(f, "Array(Nothing)"), @@ -559,6 +567,7 @@ impl Display for TableDataType { write!(f, ")") } TableDataType::Variant => write!(f, "Variant"), + TableDataType::Interval => write!(f, "Interval"), TableDataType::Geometry => write!(f, "Geometry"), TableDataType::Geography => write!(f, "Geography"), } @@ -1016,6 +1025,7 @@ impl Display for Domain { Domain::String(domain) => write!(f, "{domain}"), Domain::Timestamp(domain) => write!(f, "{domain}"), Domain::Date(domain) => write!(f, "{domain}"), + Domain::Interval(domain) => write!(f, "{:?}", domain), Domain::Nullable(domain) => write!(f, "{domain}"), Domain::Array(None) => write!(f, "[]"), Domain::Array(Some(domain)) => write!(f, "[{domain}]"), diff --git a/src/query/expression/src/utils/variant_transform.rs b/src/query/expression/src/utils/variant_transform.rs index 407d38e7db4a..46787afd2907 100644 --- a/src/query/expression/src/utils/variant_transform.rs +++ b/src/query/expression/src/utils/variant_transform.rs @@ -33,6 +33,7 @@ pub fn contains_variant(data_type: &DataType) -> bool { | DataType::EmptyMap | DataType::Boolean | DataType::Binary + | DataType::Interval | DataType::String | DataType::Number(_) | DataType::Decimal(_) @@ -77,6 +78,7 @@ fn transform_scalar(scalar: ScalarRef<'_>, decode: bool) -> Result { | ScalarRef::Decimal(_) | ScalarRef::Timestamp(_) | ScalarRef::Date(_) + | ScalarRef::Interval(_) | ScalarRef::Boolean(_) | ScalarRef::Binary(_) | ScalarRef::String(_) diff --git a/src/query/expression/src/utils/visitor.rs b/src/query/expression/src/utils/visitor.rs index b3b9e06b211d..e45c4aac4d5e 100755 --- a/src/query/expression/src/utils/visitor.rs +++ b/src/query/expression/src/utils/visitor.rs @@ -14,6 +14,7 @@ use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; use databend_common_exception::Result; use decimal::DecimalType; use geometry::GeometryType; @@ -79,6 +80,10 @@ pub trait ValueVisitor { self.visit_typed_column::(buffer) } + fn visit_interval(&mut self, buffer: Buffer) -> Result<()> { + self.visit_typed_column::(buffer) + } + fn visit_array(&mut self, column: Box>) -> Result<()> { self.visit_typed_column::(Column::Array(column)) } @@ -132,6 +137,7 @@ pub trait ValueVisitor { Column::String(column) => self.visit_string(column), Column::Timestamp(buffer) => self.visit_timestamp(buffer), Column::Date(buffer) => self.visit_date(buffer), + Column::Interval(buffer) => self.visit_interval(buffer), Column::Array(column) => self.visit_array(column), Column::Map(column) => self.visit_map(column), Column::Tuple(columns) => self.visit_tuple(columns), diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index e6a518cc7289..4a30ae2fbbd8 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -28,6 +28,7 @@ use databend_common_base::base::OrderedFloat; use databend_common_column::bitmap::Bitmap; use databend_common_column::bitmap::MutableBitmap; use databend_common_column::buffer::Buffer; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_io::prelude::BinaryRead; @@ -113,6 +114,7 @@ pub enum Scalar { Decimal(DecimalScalar), Timestamp(i64), Date(i32), + Interval(months_days_micros), Boolean(bool), Binary(Vec), String(String), @@ -138,6 +140,7 @@ pub enum ScalarRef<'a> { String(&'a str), Timestamp(i64), Date(i32), + Interval(months_days_micros), Array(Column), Map(Column), Bitmap(&'a [u8]), @@ -159,6 +162,7 @@ pub enum Column { String(StringColumn), Timestamp(Buffer), Date(Buffer), + Interval(Buffer), Array(Box>), Map(Box>), Bitmap(BinaryColumn), @@ -181,6 +185,7 @@ pub enum ColumnVec { String(Vec), Timestamp(Vec>), Date(Vec>), + Interval(Vec>), Array(Vec>), Map(Vec>>), Bitmap(Vec), @@ -203,6 +208,7 @@ pub enum ColumnBuilder { String(StringColumnBuilder), Timestamp(Vec), Date(Vec), + Interval(Vec), Array(Box>), Map(Box>), Bitmap(BinaryColumnBuilder), @@ -323,6 +329,7 @@ impl Scalar { Scalar::String(s) => ScalarRef::String(s.as_str()), Scalar::Timestamp(t) => ScalarRef::Timestamp(*t), Scalar::Date(d) => ScalarRef::Date(*d), + Scalar::Interval(d) => ScalarRef::Interval(*d), Scalar::Array(col) => ScalarRef::Array(col.clone()), Scalar::Map(col) => ScalarRef::Map(col.clone()), Scalar::Bitmap(b) => ScalarRef::Bitmap(b.as_slice()), @@ -356,6 +363,7 @@ impl Scalar { DataType::Decimal(ty) => Scalar::Decimal(ty.default_scalar()), DataType::Timestamp => Scalar::Timestamp(0), DataType::Date => Scalar::Date(0), + DataType::Interval => Scalar::Interval(months_days_micros(0)), DataType::Nullable(_) => Scalar::Null, DataType::Array(ty) => { let builder = ColumnBuilder::with_capacity(ty, 0); @@ -386,6 +394,7 @@ impl Scalar { | Scalar::Decimal(_) | Scalar::Timestamp(_) | Scalar::Date(_) + | Scalar::Interval(_) | Scalar::Boolean(_) | Scalar::Binary(_) | Scalar::String(_) @@ -403,6 +412,7 @@ impl Scalar { Scalar::Decimal(d) => d.is_positive(), Scalar::Timestamp(t) => *t > 0, Scalar::Date(d) => *d > 0, + Scalar::Interval(i) => i.0.is_positive(), _ => unreachable!("is_positive() called on non-numeric scalar"), } } @@ -438,6 +448,7 @@ impl ScalarRef<'_> { ScalarRef::String(s) => Scalar::String(s.to_string()), ScalarRef::Timestamp(t) => Scalar::Timestamp(*t), ScalarRef::Date(d) => Scalar::Date(*d), + ScalarRef::Interval(i) => Scalar::Interval(*i), ScalarRef::Array(col) => Scalar::Array(col.clone()), ScalarRef::Map(col) => Scalar::Map(col.clone()), ScalarRef::Bitmap(b) => Scalar::Bitmap(b.to_vec()), @@ -484,6 +495,7 @@ impl ScalarRef<'_> { }), ScalarRef::Timestamp(t) => Domain::Timestamp(SimpleDomain { min: *t, max: *t }), ScalarRef::Date(d) => Domain::Date(SimpleDomain { min: *d, max: *d }), + ScalarRef::Interval(i) => Domain::Interval(SimpleDomain { min: *i, max: *i }), ScalarRef::Array(array) => { if array.len() == 0 { Domain::Array(None) @@ -536,6 +548,7 @@ impl ScalarRef<'_> { ScalarRef::String(s) => s.len(), ScalarRef::Timestamp(_) => 8, ScalarRef::Date(_) => 4, + ScalarRef::Interval(_) => 16, ScalarRef::Array(col) => col.memory_size(), ScalarRef::Map(col) => col.memory_size(), ScalarRef::Bitmap(b) => b.len(), @@ -564,6 +577,7 @@ impl ScalarRef<'_> { ScalarRef::String(_) => DataType::String, ScalarRef::Timestamp(_) => DataType::Timestamp, ScalarRef::Date(_) => DataType::Date, + ScalarRef::Interval(_) => DataType::Interval, ScalarRef::Array(array) => DataType::Array(Box::new(array.data_type())), ScalarRef::Map(col) => DataType::Map(Box::new(col.data_type())), ScalarRef::Bitmap(_) => DataType::Bitmap, @@ -643,6 +657,7 @@ impl ScalarRef<'_> { (ScalarRef::Variant(_), ScalarRef::Variant(_)) => Some(DataType::Variant), (ScalarRef::Geometry(_), ScalarRef::Geometry(_)) => Some(DataType::Geometry), (ScalarRef::Geography(_), ScalarRef::Geography(_)) => Some(DataType::Geography), + (ScalarRef::Interval(_), ScalarRef::Interval(_)) => Some(DataType::Interval), _ => None, } } @@ -661,6 +676,7 @@ impl ScalarRef<'_> { (ScalarRef::Binary(_), DataType::Binary) => true, (ScalarRef::String(_), DataType::String) => true, (ScalarRef::Timestamp(_), DataType::Timestamp) => true, + (ScalarRef::Interval(_), DataType::Interval) => true, (ScalarRef::Date(_), DataType::Date) => true, (ScalarRef::Bitmap(_), DataType::Bitmap) => true, (ScalarRef::Variant(_), DataType::Variant) => true, @@ -695,6 +711,7 @@ impl PartialOrd for Scalar { (Scalar::String(s1), Scalar::String(s2)) => s1.partial_cmp(s2), (Scalar::Timestamp(t1), Scalar::Timestamp(t2)) => t1.partial_cmp(t2), (Scalar::Date(d1), Scalar::Date(d2)) => d1.partial_cmp(d2), + (Scalar::Interval(i1), Scalar::Interval(i2)) => i1.partial_cmp(i2), (Scalar::Array(a1), Scalar::Array(a2)) => a1.partial_cmp(a2), (Scalar::Map(m1), Scalar::Map(m2)) => m1.partial_cmp(m2), (Scalar::Bitmap(b1), Scalar::Bitmap(b2)) => b1.partial_cmp(b2), @@ -741,6 +758,7 @@ impl<'b> PartialOrd> for ScalarRef<'_> { (ScalarRef::Variant(v1), ScalarRef::Variant(v2)) => jsonb::compare(v1, v2).ok(), (ScalarRef::Geometry(g1), ScalarRef::Geometry(g2)) => compare_geometry(g1, g2), (ScalarRef::Geography(g1), ScalarRef::Geography(g2)) => g1.partial_cmp(g2), + (ScalarRef::Interval(i1), ScalarRef::Interval(i2)) => i1.partial_cmp(i2), // By default, null is biggest in pgsql (ScalarRef::Null, _) => Some(Ordering::Greater), @@ -781,6 +799,7 @@ impl Hash for ScalarRef<'_> { ScalarRef::String(v) => v.hash(state), ScalarRef::Timestamp(v) => v.hash(state), ScalarRef::Date(v) => v.hash(state), + ScalarRef::Interval(v) => v.0.hash(state), ScalarRef::Array(v) => { let str = serialize_column(v); str.hash(state); @@ -825,6 +844,9 @@ impl PartialOrd for Column { col1.iter().partial_cmp(col2.iter()) } (Column::Date(col1), Column::Date(col2)) => col1.iter().partial_cmp(col2.iter()), + (Column::Interval(col1), Column::Interval(col2)) => { + col1.iter().partial_cmp(col2.iter()) + } (Column::Array(col1), Column::Array(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::Map(col1), Column::Map(col2)) => col1.iter().partial_cmp(col2.iter()), (Column::Bitmap(col1), Column::Bitmap(col2)) => col1.iter().partial_cmp(col2.iter()), @@ -883,6 +905,7 @@ impl Column { Column::String(col) => col.len(), Column::Timestamp(col) => col.len(), Column::Date(col) => col.len(), + Column::Interval(col) => col.len(), Column::Array(col) => col.len(), Column::Map(col) => col.len(), Column::Bitmap(col) => col.len(), @@ -906,6 +929,7 @@ impl Column { Column::String(col) => Some(ScalarRef::String(col.value(index))), Column::Timestamp(col) => Some(ScalarRef::Timestamp(col.get(index).cloned()?)), Column::Date(col) => Some(ScalarRef::Date(col.get(index).cloned()?)), + Column::Interval(col) => Some(ScalarRef::Interval(col.get(index).cloned()?)), Column::Array(col) => Some(ScalarRef::Array(col.index(index)?)), Column::Map(col) => Some(ScalarRef::Map(col.index(index)?)), Column::Bitmap(col) => Some(ScalarRef::Bitmap(col.index(index)?)), @@ -937,6 +961,7 @@ impl Column { Column::String(col) => ScalarRef::String(col.index_unchecked(index)), Column::Timestamp(col) => ScalarRef::Timestamp(*col.get_unchecked(index)), Column::Date(col) => ScalarRef::Date(*col.get_unchecked(index)), + Column::Interval(col) => ScalarRef::Interval(*col.get_unchecked(index)), Column::Array(col) => ScalarRef::Array(col.index_unchecked(index)), Column::Map(col) => ScalarRef::Map(col.index_unchecked(index)), Column::Bitmap(col) => ScalarRef::Bitmap(col.index_unchecked(index)), @@ -991,6 +1016,9 @@ impl Column { Column::Date(col) => { Column::Date(col.clone().sliced(range.start, range.end - range.start)) } + Column::Interval(col) => { + Column::Interval(col.clone().sliced(range.start, range.end - range.start)) + } Column::Array(col) => Column::Array(Box::new(col.slice(range))), Column::Map(col) => Column::Map(Box::new(col.slice(range))), Column::Bitmap(col) => Column::Bitmap(col.slice(range)), @@ -1053,6 +1081,13 @@ impl Column { max: *max, }) } + Column::Interval(col) => { + let (min, max) = col.iter().minmax().into_option().unwrap(); + Domain::Interval(SimpleDomain { + min: *min, + max: *max, + }) + } Column::Array(col) => { if col.len() == 0 || col.values.len() == 0 { Domain::Array(None) @@ -1105,6 +1140,7 @@ impl Column { Column::String(_) => DataType::String, Column::Timestamp(_) => DataType::Timestamp, Column::Date(_) => DataType::Date, + Column::Interval(_) => DataType::Interval, Column::Array(array) => { let inner = array.values.data_type(); DataType::Array(Box::new(inner)) @@ -1239,6 +1275,7 @@ impl Column { .map(|_| rng.gen_range(DATE_MIN..=DATE_MAX)) .collect::>(), ), + DataType::Interval => unimplemented!(), DataType::Nullable(ty) => NullableColumn::new_column( Column::random(ty, len, seed), Bitmap::from((0..len).map(|_| rng.gen_bool(0.5)).collect::>()), @@ -1372,6 +1409,7 @@ impl Column { Column::String(col) => col.memory_size(), Column::Timestamp(col) => col.len() * 8, Column::Date(col) => col.len() * 4, + Column::Interval(col) => col.len() * 16, Column::Array(col) => col.values.memory_size() + col.offsets.len() * 8, Column::Map(col) => col.values.memory_size() + col.offsets.len() * 8, Column::Bitmap(col) => col.memory_size(), @@ -1397,6 +1435,7 @@ impl Column { Column::Number(NumberColumn::Int32(col)) | Column::Date(col) => col.len() * 4, Column::Number(NumberColumn::Int64(col)) | Column::Timestamp(col) => col.len() * 8, Column::Decimal(DecimalColumn::Decimal128(col, _)) => col.len() * 16, + Column::Interval(col) => col.len() * 16, Column::Decimal(DecimalColumn::Decimal256(col, _)) => col.len() * 32, Column::Geography(col) => GeographyType::column_memory_size(col), Column::Boolean(c) => c.len(), @@ -1495,6 +1534,7 @@ impl ColumnBuilder { Column::String(col) => ColumnBuilder::String(StringColumnBuilder::from_column(col)), Column::Timestamp(col) => ColumnBuilder::Timestamp(buffer_into_mut(col)), Column::Date(col) => ColumnBuilder::Date(buffer_into_mut(col)), + Column::Interval(col) => ColumnBuilder::Interval(buffer_into_mut(col)), Column::Array(box col) => { ColumnBuilder::Array(Box::new(ArrayColumnBuilder::from_column(col))) } @@ -1553,6 +1593,7 @@ impl ColumnBuilder { ScalarRef::String(s) => ColumnBuilder::String(StringColumnBuilder::repeat(s, n)), ScalarRef::Timestamp(d) => ColumnBuilder::Timestamp(vec![*d; n]), ScalarRef::Date(d) => ColumnBuilder::Date(vec![*d; n]), + ScalarRef::Interval(i) => ColumnBuilder::Interval(vec![*i; n]), ScalarRef::Array(col) => { ColumnBuilder::Array(Box::new(ArrayColumnBuilder::repeat(col, n))) } @@ -1591,6 +1632,7 @@ impl ColumnBuilder { ColumnBuilder::String(builder) => builder.len(), ColumnBuilder::Timestamp(builder) => builder.len(), ColumnBuilder::Date(builder) => builder.len(), + ColumnBuilder::Interval(builder) => builder.len(), ColumnBuilder::Array(builder) => builder.len(), ColumnBuilder::Map(builder) => builder.len(), ColumnBuilder::Bitmap(builder) => builder.len(), @@ -1628,6 +1670,7 @@ impl ColumnBuilder { ColumnBuilder::String(col) => col.memory_size(), ColumnBuilder::Timestamp(col) => col.len() * 8, ColumnBuilder::Date(col) => col.len() * 4, + ColumnBuilder::Interval(col) => col.len() * 16, ColumnBuilder::Array(col) => col.builder.memory_size() + col.offsets.len() * 8, ColumnBuilder::Map(col) => col.builder.memory_size() + col.offsets.len() * 8, ColumnBuilder::Bitmap(col) => col.data.len() + col.offsets.len() * 8, @@ -1656,6 +1699,7 @@ impl ColumnBuilder { ColumnBuilder::String(_) => DataType::String, ColumnBuilder::Timestamp(_) => DataType::Timestamp, ColumnBuilder::Date(_) => DataType::Date, + ColumnBuilder::Interval(_) => DataType::Interval, ColumnBuilder::Array(col) => { let inner = col.builder.data_type(); DataType::Array(Box::new(inner)) @@ -1704,6 +1748,7 @@ impl ColumnBuilder { DataType::String => ColumnBuilder::String(StringColumnBuilder::with_capacity(capacity)), DataType::Timestamp => ColumnBuilder::Timestamp(Vec::with_capacity(capacity)), DataType::Date => ColumnBuilder::Date(Vec::with_capacity(capacity)), + DataType::Interval => ColumnBuilder::Interval(Vec::with_capacity(capacity)), DataType::Nullable(ty) => ColumnBuilder::Nullable(Box::new(NullableColumnBuilder { builder: Self::with_capacity_hint(ty, capacity, enable_datasize_hint), validity: MutableBitmap::with_capacity(capacity), @@ -1782,6 +1827,9 @@ impl ColumnBuilder { } DataType::Timestamp => ColumnBuilder::Timestamp(vec![0; len]), DataType::Date => ColumnBuilder::Date(vec![0; len]), + DataType::Interval => { + ColumnBuilder::Interval(vec![months_days_micros::new(0, 0, 0); len]) + } // binary based DataType::Binary => ColumnBuilder::Binary(BinaryColumnBuilder::repeat_default(len)), @@ -1839,6 +1887,9 @@ impl ColumnBuilder { (ColumnBuilder::Date(builder), ScalarRef::Date(value)) => { DateType::push_item(builder, value) } + (ColumnBuilder::Interval(builder), ScalarRef::Interval(value)) => { + IntervalType::push_item(builder, value) + } (ColumnBuilder::Array(builder), ScalarRef::Array(value)) => { ArrayType::push_item(builder, value); } @@ -1891,6 +1942,9 @@ impl ColumnBuilder { (ColumnBuilder::Timestamp(builder), ScalarRef::Timestamp(value)) => { TimestampType::push_item_repeat(builder, *value, n); } + (ColumnBuilder::Interval(builder), ScalarRef::Interval(value)) => { + IntervalType::push_item_repeat(builder, *value, n); + } (ColumnBuilder::Date(builder), ScalarRef::Date(value)) => { DateType::push_item_repeat(builder, *value, n); } @@ -1944,6 +1998,7 @@ impl ColumnBuilder { ColumnBuilder::String(builder) => builder.commit_row(), ColumnBuilder::Timestamp(builder) => builder.push(0), ColumnBuilder::Date(builder) => builder.push(0), + ColumnBuilder::Interval(builder) => builder.push(months_days_micros::new(0, 0, 0)), ColumnBuilder::Array(builder) => builder.push_default(), ColumnBuilder::Map(builder) => builder.push_default(), ColumnBuilder::Bitmap(builder) => builder.commit_row(), @@ -2020,6 +2075,10 @@ impl ColumnBuilder { let value: i32 = reader.read_scalar()?; builder.push(value); } + ColumnBuilder::Interval(builder) => { + let value = months_days_micros(i128::de_binary(reader)); + builder.push(value); + } ColumnBuilder::Array(builder) => { let len = reader.read_scalar::()?; for _ in 0..len { @@ -2125,6 +2184,12 @@ impl ColumnBuilder { builder.push(value); } } + ColumnBuilder::Interval(builder) => { + for row in 0..rows { + let mut reader = &reader[step * row..]; + builder.push(months_days_micros(i128::de_binary(&mut reader))); + } + } ColumnBuilder::Array(builder) => { for row in 0..rows { let mut reader = &reader[step * row..]; @@ -2198,6 +2263,7 @@ impl ColumnBuilder { ColumnBuilder::String(builder) => builder.pop().map(Scalar::String), ColumnBuilder::Timestamp(builder) => builder.pop().map(Scalar::Timestamp), ColumnBuilder::Date(builder) => builder.pop().map(Scalar::Date), + ColumnBuilder::Interval(builder) => builder.pop().map(Scalar::Interval), ColumnBuilder::Array(builder) => builder.pop().map(Scalar::Array), ColumnBuilder::Map(builder) => builder.pop().map(Scalar::Map), ColumnBuilder::Bitmap(builder) => builder.pop().map(Scalar::Bitmap), @@ -2263,6 +2329,9 @@ impl ColumnBuilder { (ColumnBuilder::Date(builder), Column::Date(other)) => { builder.extend_from_slice(other); } + (ColumnBuilder::Interval(builder), Column::Interval(other)) => { + builder.extend_from_slice(other); + } (ColumnBuilder::Array(builder), Column::Array(other)) => { builder.append_column(other.as_ref()); } @@ -2309,6 +2378,7 @@ impl ColumnBuilder { ColumnBuilder::String(b) => Column::String(StringType::build_column(b)), ColumnBuilder::Timestamp(b) => Column::Timestamp(TimestampType::build_column(b)), ColumnBuilder::Date(b) => Column::Date(DateType::build_column(b)), + ColumnBuilder::Interval(b) => Column::Interval(IntervalType::build_column(b)), ColumnBuilder::Bitmap(b) => Column::Bitmap(BitmapType::build_column(b)), ColumnBuilder::Variant(b) => Column::Variant(VariantType::build_column(b)), ColumnBuilder::Geometry(b) => Column::Geometry(GeometryType::build_column(b)), @@ -2339,6 +2409,7 @@ impl ColumnBuilder { ColumnBuilder::String(b) => Scalar::String(StringType::build_scalar(b)), ColumnBuilder::Timestamp(b) => Scalar::Timestamp(TimestampType::build_scalar(b)), ColumnBuilder::Date(b) => Scalar::Date(DateType::build_scalar(b)), + ColumnBuilder::Interval(b) => Scalar::Interval(IntervalType::build_scalar(b)), ColumnBuilder::Bitmap(b) => Scalar::Bitmap(BitmapType::build_scalar(b)), ColumnBuilder::Variant(b) => Scalar::Variant(VariantType::build_scalar(b)), ColumnBuilder::Geometry(b) => Scalar::Geometry(GeometryType::build_scalar(b)), diff --git a/src/query/formats/Cargo.toml b/src/query/formats/Cargo.toml index 778df19cceac..f0fb0831c6ea 100644 --- a/src/query/formats/Cargo.toml +++ b/src/query/formats/Cargo.toml @@ -13,6 +13,7 @@ test = true [dependencies] databend-common-base = { workspace = true } +databend-common-column = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-io = { workspace = true } diff --git a/src/query/formats/src/field_decoder/fast_values.rs b/src/query/formats/src/field_decoder/fast_values.rs index 4e1a10bc64fb..3231e50c924e 100644 --- a/src/query/formats/src/field_decoder/fast_values.rs +++ b/src/query/formats/src/field_decoder/fast_values.rs @@ -22,8 +22,10 @@ use std::sync::LazyLock; use aho_corasick::AhoCorasick; use bstr::ByteSlice; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_exception::ToErrorCode; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::serialize::uniform_date; use databend_common_expression::types::array::ArrayColumnBuilder; @@ -57,6 +59,7 @@ use databend_common_io::geography::geography_from_ewkt_bytes; use databend_common_io::parse_bitmap; use databend_common_io::parse_bytes_to_ewkb; use databend_common_io::prelude::FormatSettings; +use databend_common_io::Interval; use jsonb::parse_value; use lexical_core::FromLexical; use num::cast::AsPrimitive; @@ -154,6 +157,7 @@ impl FastFieldDecoderValues { ColumnBuilder::Geometry(c) => self.read_geometry(c, reader, positions), ColumnBuilder::Geography(c) => self.read_geography(c, reader, positions), ColumnBuilder::Binary(_) => Err(ErrorCode::Unimplemented("binary literal")), + ColumnBuilder::Interval(c) => self.read_interval(c, reader, positions), ColumnBuilder::EmptyArray { .. } | ColumnBuilder::EmptyMap { .. } => { Err(ErrorCode::Unimplemented("empty array/map literal")) } @@ -276,6 +280,26 @@ impl FastFieldDecoderValues { Ok(()) } + fn read_interval>( + &self, + column: &mut Vec, + reader: &mut Cursor, + positions: &mut VecDeque, + ) -> Result<()> { + let mut buf = Vec::new(); + self.read_string_inner(reader, &mut buf, positions)?; + let res = + std::str::from_utf8(buf.as_slice()).map_err_to_code(ErrorCode::BadBytes, || { + format!( + "UTF-8 Conversion Failed: Unable to convert value {:?} to UTF-8", + buf + ) + })?; + let i = Interval::from_string(res)?; + column.push(months_days_micros::new(i.months, i.days, i.micros)); + Ok(()) + } + fn read_date>( &self, column: &mut Vec, diff --git a/src/query/formats/src/field_decoder/nested.rs b/src/query/formats/src/field_decoder/nested.rs index 4398ee8e221e..950920ea2ebd 100644 --- a/src/query/formats/src/field_decoder/nested.rs +++ b/src/query/formats/src/field_decoder/nested.rs @@ -17,8 +17,10 @@ use std::io::BufRead; use std::io::Cursor; use bstr::ByteSlice; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_exception::ToErrorCode; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::serialize::uniform_date; use databend_common_expression::types::array::ArrayColumnBuilder; @@ -50,6 +52,7 @@ use databend_common_io::cursor_ext::ReadNumberExt; use databend_common_io::geography::geography_from_ewkt_bytes; use databend_common_io::parse_bitmap; use databend_common_io::parse_bytes_to_ewkb; +use databend_common_io::Interval; use jsonb::parse_value; use lexical_core::FromLexical; @@ -128,6 +131,7 @@ impl NestedValues { DecimalColumnBuilder::DECIMAL_TYPE(c, size) => self.read_decimal(c, *size, reader), }), ColumnBuilder::Date(c) => self.read_date(c, reader), + ColumnBuilder::Interval(c) => self.read_interval(c, reader), ColumnBuilder::Timestamp(c) => self.read_timestamp(c, reader), ColumnBuilder::Binary(c) => self.read_binary(c, reader), ColumnBuilder::String(c) => self.read_string(c, reader), @@ -251,6 +255,25 @@ impl NestedValues { Ok(()) } + fn read_interval>( + &self, + column: &mut Vec, + reader: &mut Cursor, + ) -> Result<()> { + let mut buf = Vec::new(); + self.read_string_inner(reader, &mut buf)?; + let res = + std::str::from_utf8(buf.as_slice()).map_err_to_code(ErrorCode::BadBytes, || { + format!( + "UTF-8 Conversion Failed: Unable to convert value {:?} to UTF-8", + buf + ) + })?; + let i = Interval::from_string(res)?; + column.push(months_days_micros::new(i.months, i.days, i.micros)); + Ok(()) + } + fn read_timestamp>( &self, column: &mut Vec, diff --git a/src/query/formats/src/field_decoder/separated_text.rs b/src/query/formats/src/field_decoder/separated_text.rs index 0975ca0f1265..6e5cc57b81d2 100644 --- a/src/query/formats/src/field_decoder/separated_text.rs +++ b/src/query/formats/src/field_decoder/separated_text.rs @@ -16,8 +16,10 @@ use std::any::Any; use std::io::Cursor; use bstr::ByteSlice; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_exception::ToErrorCode; use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::serialize::uniform_date; use databend_common_expression::types::array::ArrayColumnBuilder; @@ -48,6 +50,7 @@ use databend_common_io::cursor_ext::ReadBytesExt; use databend_common_io::geography::geography_from_ewkt_bytes; use databend_common_io::parse_bitmap; use databend_common_io::parse_bytes_to_ewkb; +use databend_common_io::Interval; use databend_common_meta_app::principal::CsvFileFormatParams; use databend_common_meta_app::principal::TsvFileFormatParams; use jsonb::parse_value; @@ -144,6 +147,7 @@ impl SeparatedTextDecoder { DecimalColumnBuilder::DECIMAL_TYPE(c, size) => self.read_decimal(c, *size, data), }), ColumnBuilder::Date(c) => self.read_date(c, data), + ColumnBuilder::Interval(c) => self.read_interval(c, data), ColumnBuilder::Timestamp(c) => self.read_timestamp(c, data), ColumnBuilder::Array(c) => self.read_array(c, data), ColumnBuilder::Map(c) => self.read_map(c, data), @@ -259,6 +263,18 @@ impl SeparatedTextDecoder { Ok(()) } + fn read_interval(&self, column: &mut Vec, data: &[u8]) -> Result<()> { + let res = std::str::from_utf8(data).map_err_to_code(ErrorCode::BadBytes, || { + format!( + "UTF-8 Conversion Failed: Unable to convert value {:?} to UTF-8", + data + ) + })?; + let i = Interval::from_string(res)?; + column.push(months_days_micros::new(i.months, i.days, i.micros)); + Ok(()) + } + fn read_timestamp(&self, column: &mut Vec, data: &[u8]) -> Result<()> { let mut ts = if !data.contains(&b'-') { read_num_text_exact(data)? diff --git a/src/query/formats/src/field_encoder/csv.rs b/src/query/formats/src/field_encoder/csv.rs index 9e49bef6197c..2b5948227423 100644 --- a/src/query/formats/src/field_encoder/csv.rs +++ b/src/query/formats/src/field_encoder/csv.rs @@ -137,7 +137,11 @@ impl FieldEncoderCSV { self.string_formatter.write_string(buf.as_bytes(), out_buf); } - Column::Date(..) | Column::Timestamp(..) | Column::Bitmap(..) | Column::Variant(..) => { + Column::Date(..) + | Column::Timestamp(..) + | Column::Bitmap(..) + | Column::Variant(..) + | Column::Interval(_) => { let mut buf = Vec::new(); self.simple.write_field(column, row_index, &mut buf, false); self.string_formatter.write_string(&buf, out_buf); diff --git a/src/query/formats/src/field_encoder/json.rs b/src/query/formats/src/field_encoder/json.rs index 2549d3203e66..44fd783c5071 100644 --- a/src/query/formats/src/field_encoder/json.rs +++ b/src/query/formats/src/field_encoder/json.rs @@ -70,7 +70,10 @@ impl FieldEncoderJSON { self.write_string(buf.as_bytes(), out_buf); } - Column::Date(..) | Column::Timestamp(..) | Column::Bitmap(..) => { + Column::Date(..) + | Column::Timestamp(..) + | Column::Bitmap(..) + | Column::Interval(..) => { let mut buf = Vec::new(); self.simple.write_field(column, row_index, &mut buf, false); self.write_string(&buf, out_buf); diff --git a/src/query/formats/src/field_encoder/values.rs b/src/query/formats/src/field_encoder/values.rs index f0fa3a8b2db4..90444022335e 100644 --- a/src/query/formats/src/field_encoder/values.rs +++ b/src/query/formats/src/field_encoder/values.rs @@ -14,10 +14,12 @@ use chrono_tz::Tz; use databend_common_base::base::OrderedFloat; +use databend_common_column::types::months_days_micros; use databend_common_expression::types::array::ArrayColumn; use databend_common_expression::types::date::date_to_string; use databend_common_expression::types::decimal::DecimalColumn; use databend_common_expression::types::geography::GeographyColumn; +use databend_common_expression::types::interval::interval_to_string; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::string::StringColumn; use databend_common_expression::types::timestamp::timestamp_to_string; @@ -152,6 +154,7 @@ impl FieldEncoderValues { Column::Binary(c) => self.write_binary(c, row_index, out_buf), Column::String(c) => self.write_string(c, row_index, out_buf, in_nested), Column::Date(c) => self.write_date(c, row_index, out_buf, in_nested), + Column::Interval(c) => self.write_interval(c, row_index, out_buf, in_nested), Column::Timestamp(c) => self.write_timestamp(c, row_index, out_buf, in_nested), Column::Bitmap(b) => self.write_bitmap(b, row_index, out_buf, in_nested), Column::Variant(c) => self.write_variant(c, row_index, out_buf, in_nested), @@ -276,6 +279,18 @@ impl FieldEncoderValues { self.write_string_inner(s.as_bytes(), out_buf, in_nested); } + fn write_interval( + &self, + column: &Buffer, + row_index: usize, + out_buf: &mut Vec, + in_nested: bool, + ) { + let v = unsafe { column.get_unchecked(row_index) }; + let s = interval_to_string(v).to_string(); + self.write_string_inner(s.as_bytes(), out_buf, in_nested); + } + fn write_timestamp( &self, column: &Buffer, diff --git a/src/query/formats/src/lib.rs b/src/query/formats/src/lib.rs index 2d17e3e494e9..3dfc23bad0b1 100644 --- a/src/query/formats/src/lib.rs +++ b/src/query/formats/src/lib.rs @@ -16,6 +16,8 @@ #![feature(box_patterns)] #![feature(cursor_split)] +extern crate core; + mod binary; mod clickhouse; mod common_settings; diff --git a/src/query/formats/src/output_format/json.rs b/src/query/formats/src/output_format/json.rs index b575617fe49e..8abba0612eac 100644 --- a/src/query/formats/src/output_format/json.rs +++ b/src/query/formats/src/output_format/json.rs @@ -13,6 +13,7 @@ // limitations under the License. use databend_common_expression::date_helper::DateConverter; +use databend_common_expression::types::interval::interval_to_string; use databend_common_expression::types::number::NumberScalar; use databend_common_expression::DataBlock; use databend_common_expression::ScalarRef; @@ -99,6 +100,7 @@ fn scalar_to_json(s: ScalarRef<'_>, format: &FormatSettings) -> JsonValue { let dt = DateConverter::to_date(&v, format.jiff_timezone.clone()); serde_json::to_value(strtime::format("%Y-%m-%d", dt).unwrap()).unwrap() } + ScalarRef::Interval(v) => serde_json::to_value(interval_to_string(&v).to_string()).unwrap(), ScalarRef::Timestamp(v) => { let dt = DateConverter::to_timestamp(&v, format.jiff_timezone.clone()); serde_json::to_value(strtime::format("%Y-%m-%d %H:%M:%S", &dt).unwrap()).unwrap() diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index 60fcc19f1fb1..0feb05a9e1de 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -20,6 +20,7 @@ crc32fast = { workspace = true } ctor = { workspace = true } databend-common-base = { workspace = true } +databend-common-column = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-hashtable = { workspace = true } diff --git a/src/query/functions/src/scalars/interval.rs b/src/query/functions/src/scalars/interval.rs new file mode 100644 index 000000000000..8e086c34501c --- /dev/null +++ b/src/query/functions/src/scalars/interval.rs @@ -0,0 +1,81 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_column::types::months_days_micros; +use databend_common_expression::error_to_null; +use databend_common_expression::types::interval::interval_to_string; +use databend_common_expression::types::interval::string_to_interval; +use databend_common_expression::types::IntervalType; +use databend_common_expression::types::NullableType; +use databend_common_expression::types::StringType; +use databend_common_expression::vectorize_with_builder_1_arg; +use databend_common_expression::EvalContext; +use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionRegistry; +use databend_common_expression::Value; + +pub fn register(registry: &mut FunctionRegistry) { + // cast(xx AS interval) + // to_interval(xx) + register_string_to_interval(registry); + register_interval_to_string(registry); +} + +fn register_string_to_interval(registry: &mut FunctionRegistry) { + registry.register_passthrough_nullable_1_arg::( + "to_interval", + |_, _| FunctionDomain::MayThrow, + eval_string_to_interval, + ); + registry.register_combine_nullable_1_arg::( + "try_to_interval", + |_, _| FunctionDomain::Full, + error_to_null(eval_string_to_interval), + ); + + fn eval_string_to_interval( + val: Value, + ctx: &mut EvalContext, + ) -> Value { + vectorize_with_builder_1_arg::(|val, output, ctx| { + match string_to_interval(val) { + Ok(interval) => output.push(months_days_micros::new( + interval.months, + interval.days, + interval.micros, + )), + Err(e) => { + ctx.set_error( + output.len(), + format!("cannot parse to type `INTERVAL`. {}", e), + ); + output.push(months_days_micros::new(0, 0, 0)); + } + } + })(val, ctx) + } +} + +fn register_interval_to_string(registry: &mut FunctionRegistry) { + registry.register_combine_nullable_1_arg::( + "to_string", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_1_arg::>( + |interval, output, _| { + let res = interval_to_string(&interval).to_string(); + output.push(&res); + }, + ), + ); +} diff --git a/src/query/functions/src/scalars/mod.rs b/src/query/functions/src/scalars/mod.rs index 3f2cb8418d7a..2a598ec23c5b 100644 --- a/src/query/functions/src/scalars/mod.rs +++ b/src/query/functions/src/scalars/mod.rs @@ -30,6 +30,7 @@ mod geography; mod geometry; mod hash; mod hilbert; +mod interval; mod map; mod math; mod other; @@ -66,4 +67,5 @@ pub fn register(registry: &mut FunctionRegistry) { geometry::register(registry); geography::register(registry); hilbert::register(registry); + interval::register(registry); } diff --git a/src/query/functions/tests/it/scalars/parser.rs b/src/query/functions/tests/it/scalars/parser.rs index 238b4074942c..d7aee76be57f 100644 --- a/src/query/functions/tests/it/scalars/parser.rs +++ b/src/query/functions/tests/it/scalars/parser.rs @@ -608,6 +608,7 @@ fn transform_data_type(target_type: databend_common_ast::ast::TypeName) -> DataT databend_common_ast::ast::TypeName::String => DataType::String, databend_common_ast::ast::TypeName::Timestamp => DataType::Timestamp, databend_common_ast::ast::TypeName::Date => DataType::Date, + databend_common_ast::ast::TypeName::Interval => DataType::Interval, databend_common_ast::ast::TypeName::Array(item_type) => { DataType::Array(Box::new(transform_data_type(*item_type))) } diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 81ac104ff9fc..417e0721b792 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -4009,6 +4009,8 @@ Functions overloads: 23 to_int8(Float64 NULL) :: Int8 NULL 24 to_int8(Boolean) :: Int8 25 to_int8(Boolean NULL) :: Int8 NULL +0 to_interval(String) :: Interval +1 to_interval(String NULL) :: Interval NULL 0 to_last_of_month(Date) :: Date 1 to_last_of_month(Date NULL) :: Date NULL 2 to_last_of_month(Timestamp) :: Date @@ -4174,6 +4176,8 @@ Functions overloads: 34 to_string(Bitmap NULL) :: String NULL 35 to_string(Geometry) :: String 36 to_string(Geometry NULL) :: String NULL +37 to_string(Interval) :: String NULL +38 to_string(Interval NULL) :: String NULL 0 to_timestamp(Variant) :: Timestamp 1 to_timestamp(Variant NULL) :: Timestamp NULL 2 to_timestamp(String) :: Timestamp @@ -4604,6 +4608,8 @@ Functions overloads: 23 try_to_int8(Float64 NULL) :: Int8 NULL 24 try_to_int8(Boolean) :: Int8 NULL 25 try_to_int8(Boolean NULL) :: Int8 NULL +0 try_to_interval(String) :: Interval NULL +1 try_to_interval(String NULL) :: Interval NULL 0 try_to_string(Variant) :: String NULL 1 try_to_string(Variant NULL) :: String NULL 2 try_to_string(UInt8) :: String NULL diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 7c5fc5c474ab..cccedda5e664 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -5139,6 +5139,7 @@ pub fn resolve_type_name(type_name: &TypeName, not_null: bool) -> Result TableDataType::Bitmap, + TypeName::Interval => TableDataType::Interval, TypeName::Tuple { fields_type, fields_name, diff --git a/src/query/storages/common/stage/src/read/cast.rs b/src/query/storages/common/stage/src/read/cast.rs index c8e93c000750..152ab9ac6819 100644 --- a/src/query/storages/common/stage/src/read/cast.rs +++ b/src/query/storages/common/stage/src/read/cast.rs @@ -140,6 +140,10 @@ pub fn load_can_auto_cast_to(from_type: &DataType, to_type: &DataType) -> bool { (String | Binary | Variant, Geometry) => true, (_, Geometry) => false, + // [specificity] + (String, Interval) => true, + (_, Interval) => false, + // TODO: // (String | Binary | Variant, Geography) => true, (_, Geography) => false, diff --git a/src/query/storages/fuse/src/table_functions/clustering_information.rs b/src/query/storages/fuse/src/table_functions/clustering_information.rs index a624d81ffabe..1f153b2e1b1a 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_information.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_information.rs @@ -557,6 +557,9 @@ fn domain_to_minmax(domain: &Domain) -> (Scalar, Scalar) { (Scalar::Timestamp(*min), Scalar::Timestamp(*max)) } Domain::Date(SimpleDomain { min, max }) => (Scalar::Date(*min), Scalar::Date(*max)), + Domain::Interval(SimpleDomain { min, max }) => { + (Scalar::Interval(*min), Scalar::Interval(*max)) + } Domain::Nullable(NullableDomain { has_null, value }) => { if let Some(v) = value { let (min, mut max) = domain_to_minmax(v); diff --git a/src/tests/sqlsmith/src/sql_gen/ddl.rs b/src/tests/sqlsmith/src/sql_gen/ddl.rs index 14517694fbcb..9897829ebfe9 100644 --- a/src/tests/sqlsmith/src/sql_gen/ddl.rs +++ b/src/tests/sqlsmith/src/sql_gen/ddl.rs @@ -301,5 +301,9 @@ fn gen_default_expr(type_name: &TypeName) -> Expr { value: Literal::Null, }, TypeName::NotNull(box ty) => gen_default_expr(ty), + TypeName::Interval => Expr::Literal { + span: None, + value: Literal::String("1 month 1 hour".to_string()), + }, } } diff --git a/tests/sqllogictests/suites/query/functions/02_0079_function_interval.test b/tests/sqllogictests/suites/query/functions/02_0079_function_interval.test new file mode 100644 index 000000000000..e2a98b40f6c4 --- /dev/null +++ b/tests/sqllogictests/suites/query/functions/02_0079_function_interval.test @@ -0,0 +1,24 @@ +onlyif http +statement ok +create or replace table t(c1 interval, c2 interval); + +onlyif http +statement ok +insert into t values('1 year 1 month ago', '1'),('1 month 1 hour ago', '1000'); + +onlyif http +query TT +select * from t order by c1; +---- +-1 year -1 month 0:00:00.000001 +-1 month -1 day -1:00:00 0:00:00.001 + +onlyif http +statement error 1006 +select to_interval('1 month 1 hour ago 1 micros'); + +onlyif http +query T +select to_interval('1 month 1 hour 1 microsecond'); +---- +1 month 1:00:00.000001 From d577e76cc932943a816ef9e58e5a02ff5f7b7e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Mon, 16 Dec 2024 14:24:41 +0800 Subject: [PATCH 2/2] feat: add key prefix count assertions to transactions (#17053) Enable transaction conditions based on the number of keys matching a prefix. Example: ```rust let txn = TxnRequest { condition: vec![ TxnCondition::match_keys_with_prefix("key/", Eq, 3) ], //... } ``` This allows transactions to proceed only when a prefix matches an expected number of keys, providing atomic prefix-based cardinality checks. This commit involves a databend-meta server side change: - Add `txn_condition::Target::KeysWithPrefix`. To provide compatibility, any change to the client that uses this feature must update the compatibility doc and upgrade the databend-meta cluster first. --- src/meta/client/src/lib.rs | 7 +- src/meta/kvapi/src/kvapi/test_suite.rs | 157 ++++++++++++++++-------- src/meta/raft-store/src/applier.rs | 71 ++++++----- src/meta/types/proto/meta.proto | 11 +- src/meta/types/src/cmd/mod.rs | 79 ++++++++++++ src/meta/types/src/proto_display.rs | 3 + src/meta/types/src/proto_ext/txn_ext.rs | 37 ++++-- 7 files changed, 270 insertions(+), 95 deletions(-) diff --git a/src/meta/client/src/lib.rs b/src/meta/client/src/lib.rs index 7a6ed44a5c56..7b6ffc037afc 100644 --- a/src/meta/client/src/lib.rs +++ b/src/meta/client/src/lib.rs @@ -110,9 +110,14 @@ pub static METACLI_COMMIT_SEMVER: LazyLock = LazyLock::new(|| { /// require the client to call kv_read_v1 for get/mget/list, /// which is added `2024-01-07: since 1.2.287` /// -/// - 2024-11-2*: since 1.2.6** +/// - 2024-11-23: since 1.2.663 /// 👥 client: remove use of `Operation::AsIs` /// +/// - 2024-12-1*: since 1.2.* +/// 🖥 server: add `txn_condition::Target::KeysWithPrefix`, +/// to support matching the key count by a prefix. +/// +/// /// Server feature set: /// ```yaml /// server_features: diff --git a/src/meta/kvapi/src/kvapi/test_suite.rs b/src/meta/kvapi/src/kvapi/test_suite.rs index ddd2210902bd..a5b113f35d6f 100644 --- a/src/meta/kvapi/src/kvapi/test_suite.rs +++ b/src/meta/kvapi/src/kvapi/test_suite.rs @@ -92,6 +92,8 @@ impl kvapi::TestSuite { self.kv_transaction_with_ttl(&builder.build().await).await?; self.kv_transaction_delete_match_seq_none(&builder.build().await) .await?; + self.kv_transaction_condition_keys_with_prefix(&builder.build().await) + .await?; self.kv_transaction_delete_match_seq_some_not_match(&builder.build().await) .await?; self.kv_transaction_delete_match_seq_some_match(&builder.build().await) @@ -111,7 +113,7 @@ impl kvapi::TestSuite { // write let res = kv.upsert_kv(UpsertKV::update("foo", b"bar")).await?; assert_eq!(None, res.prev); - assert_eq!(Some(SeqV::with_meta(1, None, b"bar".to_vec())), res.result); + assert_eq!(Some(SeqV::new(1, b("bar"))), res.result); } { @@ -120,10 +122,7 @@ impl kvapi::TestSuite { .upsert_kv(UpsertKV::update("foo", b"bar").with(MatchSeq::Exact(2))) .await?; assert_eq!( - ( - Some(SeqV::with_meta(1, None, b"bar".to_vec())), - Some(SeqV::with_meta(1, None, b"bar".to_vec())), - ), + (Some(SeqV::new(1, b("bar"))), Some(SeqV::new(1, b("bar"))),), (res.prev, res.result), "nothing changed" ); @@ -134,16 +133,8 @@ impl kvapi::TestSuite { let res = kv .upsert_kv(UpsertKV::update("foo", b"wow").with(MatchSeq::Exact(1))) .await?; - assert_eq!( - Some(SeqV::with_meta(1, None, b"bar".to_vec())), - res.prev, - "old value" - ); - assert_eq!( - Some(SeqV::with_meta(2, None, b"wow".to_vec())), - res.result, - "new value" - ); + assert_eq!(Some(SeqV::new(1, b("bar"))), res.prev, "old value"); + assert_eq!(Some(SeqV::new(2, b("wow"))), res.result, "new value"); } Ok(()) @@ -194,10 +185,7 @@ impl kvapi::TestSuite { let res = kv.upsert_kv(UpsertKV::delete(test_key)).await?; // dbg!("delete", &res); - assert_eq!( - (Some(SeqV::with_meta(2, None, b"v2".to_vec())), None), - (res.prev, res.result) - ); + assert_eq!((Some(SeqV::new(2, b("v2"))), None), (res.prev, res.result)); Ok(()) } @@ -213,38 +201,35 @@ impl kvapi::TestSuite { assert_eq!((None, None), (r.prev, r.result), "not changed"); let r = kv.upsert_kv(UpsertKV::update(test_key, b"v1")).await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.result); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.result); let seq = r.result.unwrap().seq; // unmatched seq let r = kv .upsert_kv(UpsertKV::update(test_key, b"v2").with(MatchSeq::Exact(seq + 1))) .await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.prev); - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.result); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.prev); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.result); // matched seq let r = kv .upsert_kv(UpsertKV::update(test_key, b"v2").with(MatchSeq::Exact(seq))) .await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.prev); - assert_eq!(Some(SeqV::with_meta(2, None, b"v2".to_vec())), r.result); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.prev); + assert_eq!(Some(SeqV::new(2, b("v2"))), r.result); // blind update let r = kv .upsert_kv(UpsertKV::update(test_key, b"v3").with(MatchSeq::GE(1))) .await?; - assert_eq!(Some(SeqV::with_meta(2, None, b"v2".to_vec())), r.prev); - assert_eq!(Some(SeqV::with_meta(3, None, b"v3".to_vec())), r.result); + assert_eq!(Some(SeqV::new(2, b("v2"))), r.prev); + assert_eq!(Some(SeqV::new(3, b("v3"))), r.result); // value updated let key_value = kv.get_kv(test_key).await?; assert!(key_value.is_some()); let key_value = key_value.unwrap(); - assert_eq!( - key_value, - SeqV::with_meta(key_value.seq, None, b"v3".to_vec()) - ); + assert_eq!(key_value, SeqV::new(key_value.seq, b("v3"))); Ok(()) } @@ -380,7 +365,7 @@ impl kvapi::TestSuite { let now_sec = SeqV::<()>::now_sec(); let r = kv.upsert_kv(UpsertKV::update(test_key, b"v1")).await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.result); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.result); let seq = r.result.unwrap().seq; info!("--- mismatching seq does nothing"); @@ -389,12 +374,12 @@ impl kvapi::TestSuite { .upsert_kv(UpsertKV::new( test_key, MatchSeq::Exact(seq + 1), - Operation::Update(b"v1".to_vec()), + Operation::Update(b("v1")), Some(MetaSpec::new_ttl(Duration::from_secs(20))), )) .await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.prev); - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.result); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.prev); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.result); info!("--- matching seq only update meta"); @@ -402,11 +387,11 @@ impl kvapi::TestSuite { .upsert_kv(UpsertKV::new( test_key, MatchSeq::Exact(seq), - Operation::Update(b"v1".to_vec()), + Operation::Update(b("v1")), Some(MetaSpec::new_ttl(Duration::from_secs(20))), )) .await?; - assert_eq!(Some(SeqV::with_meta(1, None, b"v1".to_vec())), r.prev); + assert_eq!(Some(SeqV::new(1, b("v1"))), r.prev); { let res = r.result.unwrap(); @@ -478,15 +463,15 @@ impl kvapi::TestSuite { let res = kv.mget_kv(&["k1".to_string(), "k2".to_string()]).await?; assert_eq!(res, vec![ - Some(SeqV::with_meta(1, None, b"v1".to_vec(),)), + Some(SeqV::new(1, b("v1"),)), // NOTE, the sequence number is increased globally (inside the namespace of generic kv) - Some(SeqV::with_meta(2, None, b"v2".to_vec(),)), + Some(SeqV::new(2, b("v2"),)), ]); let res = kv .mget_kv(&["k1".to_string(), "key_no exist".to_string()]) .await?; - assert_eq!(res, vec![Some(SeqV::new(1, b"v1".to_vec())), None]); + assert_eq!(res, vec![Some(SeqV::new(1, b("v1"))), None]); Ok(()) } @@ -663,7 +648,7 @@ impl kvapi::TestSuite { // first case: get and set one key transaction { let k1 = "txn_1_K1"; - let val1 = b"v1".to_vec(); + let val1 = b("v1"); // first insert k1 value kv.upsert_kv(UpsertKV::update(k1, &val1)).await?; @@ -740,11 +725,11 @@ impl kvapi::TestSuite { // 3rd case: get two key and set both key transaction { let k1 = "txn_3_K1"; - let val1 = b"v1".to_vec(); - let val1_new = b"v1_new".to_vec(); + let val1 = b("v1"); + let val1_new = b("v1_new"); let k2 = "txn_3_K2"; - let val2 = b"v1".to_vec(); + let val2 = b("v1"); // first insert k1 and k2 value kv.upsert_kv(UpsertKV::update(k1, &val1)).await?; @@ -1095,6 +1080,78 @@ impl kvapi::TestSuite { Ok(()) } + /// A transaction that checks the number of keys with given prefix. + pub async fn kv_transaction_condition_keys_with_prefix( + &self, + kv: &KV, + ) -> anyhow::Result<()> { + let prefix = func_name!(); + + let sample_keys_prefix = format!("{}/xxx", prefix); + + let sample = |suffix| format!("{}/{}", sample_keys_prefix, suffix); + let positive = format!("{prefix}/positive"); + let negative = format!("{prefix}/negative"); + + kv.upsert_kv(UpsertKV::update(sample("a"), &b("a"))).await?; + kv.upsert_kv(UpsertKV::update(sample("b"), &b("b"))).await?; + kv.upsert_kv(UpsertKV::update(sample("c"), &b("c"))).await?; + + use ConditionResult::*; + + // A transaction that set positive key if succeeded, + // otherwise set the negative key. + let txn = |op: ConditionResult, n: u64| TxnRequest { + condition: vec![TxnCondition::match_keys_with_prefix( + &sample_keys_prefix, + op, + n, + )], + if_then: vec![TxnOp::put(&positive, b(format!("{op:?}")))], + else_then: vec![TxnOp::put(&negative, b(format!("{op:?}")))], + }; + + for (op, n, expected) in [ + (Eq, 2, false), + (Eq, 3, true), + (Eq, 4, false), + (Ne, 2, true), + (Ne, 3, false), + (Ne, 4, true), + (Lt, 3, false), + (Lt, 4, true), + (Lt, 5, true), + (Le, 2, false), + (Le, 3, true), + (Le, 4, true), + (Gt, 2, true), + (Gt, 3, false), + (Gt, 4, false), + (Ge, 2, true), + (Ge, 3, true), + (Ge, 4, false), + ] { + kv.upsert_kv(UpsertKV::update(&positive, &b(""))).await?; + kv.upsert_kv(UpsertKV::update(&negative, &b(""))).await?; + + let resp = kv.transaction(txn(op, n)).await?; + assert_eq!( + resp.success, expected, + "case: {op:?} {n}, expected: {expected}" + ); + + let expected_key = if expected { &positive } else { &negative }; + let got = kv.get_kv(expected_key).await?.unwrap().data; + assert_eq!( + got, + b(format!("{op:?}")), + "case: {op:?} {n}, expected: {expected}" + ); + } + + Ok(()) + } + /// If `TxnDeleteRequest.match_seq` is not set, /// the delete operation will always be executed. pub async fn kv_transaction_delete_match_seq_none( @@ -1103,7 +1160,7 @@ impl kvapi::TestSuite { ) -> anyhow::Result<()> { info!("--- {}", func_name!()); let key = || "txn_1_K1".to_string(); - let val = || b"v1".to_vec(); + let val = || b("v1"); kv.upsert_kv(UpsertKV::update(key(), &val())).await?; @@ -1137,7 +1194,7 @@ impl kvapi::TestSuite { ) -> anyhow::Result<()> { info!("--- {}", func_name!()); let key = || "txn_1_K1".to_string(); - let val = || b"v1".to_vec(); + let val = || b("v1"); kv.upsert_kv(UpsertKV::update(key(), &val())).await?; @@ -1175,7 +1232,7 @@ impl kvapi::TestSuite { ) -> anyhow::Result<()> { info!("--- {}", func_name!()); let key = || "txn_1_K1".to_string(); - let val = || b"v1".to_vec(); + let val = || b("v1"); kv.upsert_kv(UpsertKV::update(key(), &val())).await?; @@ -1231,7 +1288,7 @@ impl kvapi::TestSuite { { let res = kv2.get_kv("t").await?; let res = res.unwrap(); - assert_eq!(b"t".to_vec(), res.data); + assert_eq!(b("t"), res.data); } info!("--- test mget on other node"); @@ -1243,7 +1300,7 @@ impl kvapi::TestSuite { Some(SeqV { seq: 11, meta: None, - data: b"v".to_vec() + data: b("v") }) ], res @@ -1267,6 +1324,6 @@ impl kvapi::TestSuite { } } -fn b(s: &str) -> Vec { - s.as_bytes().to_vec() +fn b(x: impl ToString) -> Vec { + x.to_string().as_bytes().to_vec() } diff --git a/src/meta/raft-store/src/applier.rs b/src/meta/raft-store/src/applier.rs index 2b47f6a6078d..6bfa2dbd40d6 100644 --- a/src/meta/raft-store/src/applier.rs +++ b/src/meta/raft-store/src/applier.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::future::ready; use std::io; use std::time::Duration; @@ -22,7 +23,7 @@ use databend_common_meta_types::raft_types::EntryPayload; use databend_common_meta_types::raft_types::StoredMembership; use databend_common_meta_types::seq_value::SeqV; use databend_common_meta_types::seq_value::SeqValue; -use databend_common_meta_types::txn_condition; +use databend_common_meta_types::txn_condition::Target; use databend_common_meta_types::txn_op; use databend_common_meta_types::txn_op_response; use databend_common_meta_types::AppliedState; @@ -49,9 +50,11 @@ use databend_common_meta_types::TxnRequest; use databend_common_meta_types::UpsertKV; use databend_common_meta_types::With; use futures::stream::TryStreamExt; +use futures_util::StreamExt; use log::debug; use log::error; use log::info; +use log::warn; use num::FromPrimitive; use crate::state_machine_api::StateMachineApi; @@ -299,48 +302,52 @@ where SM: StateMachineApi + 'static seqv.value() ); - let target = if let Some(target) = &cond.target { - target - } else { + let op = FromPrimitive::from_i32(cond.expected); + let Some(op) = op else { + warn!( + "Invalid condition: {}; TxnCondition: {}", + cond.expected, cond + ); return Ok(false); }; - let positive = match target { - txn_condition::Target::Seq(right) => { - Self::eval_seq_condition(seqv.seq(), cond.expected, right) - } - txn_condition::Target::Value(right) => { - if let Some(v) = seqv.value() { - Self::eval_value_condition(v, cond.expected, right) + let Some(against) = &cond.target else { + return Ok(false); + }; + + let positive = match against { + Target::Seq(against_seq) => Self::eval_compare(seqv.seq(), op, *against_seq), + Target::Value(against_value) => { + if let Some(stored) = seqv.value() { + Self::eval_compare(stored, op, against_value) } else { false } } + Target::KeysWithPrefix(against_n) => { + let against_n = *against_n; + + let strm = self.sm.list_kv(key).await?; + // Taking at most `against_n + 1` keys is just enough for every predicate. + let strm = strm.take((against_n + 1) as usize); + let count: u64 = strm.try_fold(0, |acc, _| ready(Ok(acc + 1))).await?; + + Self::eval_compare(count, op, against_n) + } }; Ok(positive) } - fn eval_seq_condition(left: u64, op: i32, right: &u64) -> bool { - match FromPrimitive::from_i32(op) { - Some(ConditionResult::Eq) => left == *right, - Some(ConditionResult::Gt) => left > *right, - Some(ConditionResult::Lt) => left < *right, - Some(ConditionResult::Ne) => left != *right, - Some(ConditionResult::Ge) => left >= *right, - Some(ConditionResult::Le) => left <= *right, - _ => false, - } - } - - fn eval_value_condition(left: &Vec, op: i32, right: &Vec) -> bool { - match FromPrimitive::from_i32(op) { - Some(ConditionResult::Eq) => left == right, - Some(ConditionResult::Gt) => left > right, - Some(ConditionResult::Lt) => left < right, - Some(ConditionResult::Ne) => left != right, - Some(ConditionResult::Ge) => left >= right, - Some(ConditionResult::Le) => left <= right, - _ => false, + fn eval_compare(left: T, op: ConditionResult, right: T) -> bool + where T: PartialOrd + PartialEq { + use ConditionResult::*; + match op { + Eq => left == right, + Gt => left > right, + Lt => left < right, + Ne => left != right, + Ge => left >= right, + Le => left <= right, } } diff --git a/src/meta/types/proto/meta.proto b/src/meta/types/proto/meta.proto index 90d830d4e8cc..ade20569ec7a 100644 --- a/src/meta/types/proto/meta.proto +++ b/src/meta/types/proto/meta.proto @@ -111,10 +111,17 @@ message TxnCondition { string key = 1; oneof target { - // used when compare value + // Compare the stored value of `key` against the given value. bytes value = 2; - // used when compare seq + + // Compare the stored seq of `key` against the given seq. uint64 seq = 3; + + // Compare the count of keys having the prefix `key` against the given value. + // + // Usually when using this option, append a slash `/` to the end of the prefix `key`. + // For example, if you want to count the keys with prefix `foo`, you should use `foo/` as the `key`. + uint64 keys_with_prefix = 5; } // the expected result of condition, if `expected` match the condition result, diff --git a/src/meta/types/src/cmd/mod.rs b/src/meta/types/src/cmd/mod.rs index ffb46d54ffbd..8aa4c90ab2fb 100644 --- a/src/meta/types/src/cmd/mod.rs +++ b/src/meta/types/src/cmd/mod.rs @@ -78,3 +78,82 @@ impl fmt::Display for Cmd { } } } + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use crate::Endpoint; + use crate::TxnCondition; + use crate::TxnOp; + use crate::TxnRequest; + use crate::UpsertKV; + + #[test] + fn test_serde() -> anyhow::Result<()> { + // AddNode, override = true + let cmd = super::Cmd::AddNode { + node_id: 1, + node: super::Node::new("n1", Endpoint::new("e1", 12)), + overriding: true, + }; + + let want = r#"{"AddNode":{"node_id":1,"node":{"name":"n1","endpoint":{"addr":"e1","port":12},"grpc_api_advertise_address":null},"overriding":true}}"#; + + assert_eq!(want, serde_json::to_string(&cmd)?); + assert_eq!(cmd, serde_json::from_str(want)?); + + // AddNode, override = false + let cmd = super::Cmd::AddNode { + node_id: 1, + node: super::Node::new("n1", Endpoint::new("e1", 12)), + overriding: false, + }; + + let want = r#"{"AddNode":{"node_id":1,"node":{"name":"n1","endpoint":{"addr":"e1","port":12},"grpc_api_advertise_address":null},"overriding":false}}"#; + assert_eq!(want, serde_json::to_string(&cmd)?); + assert_eq!(cmd, serde_json::from_str(want)?); + + // Decode from absent override field + let want = r#"{"AddNode":{"node_id":1,"node":{"name":"n1","endpoint":{"addr":"e1","port":12},"grpc_api_advertise_address":null}}}"#; + assert_eq!(cmd, serde_json::from_str(want)?); + + // RemoveNode + let cmd = super::Cmd::RemoveNode { node_id: 1 }; + let want = r#"{"RemoveNode":{"node_id":1}}"#; + assert_eq!(want, serde_json::to_string(&cmd)?); + assert_eq!(cmd, serde_json::from_str(want)?); + + // UpsertKV + let cmd = super::Cmd::UpsertKV(UpsertKV::insert("k", b"v")); + let want = r#"{"UpsertKV":{"key":"k","seq":{"Exact":0},"value":{"Update":[118]},"value_meta":null}}"#; + assert_eq!(want, serde_json::to_string(&cmd)?); + assert_eq!(cmd, serde_json::from_str(want)?); + + // Transaction + let cmd = super::Cmd::Transaction(TxnRequest { + condition: vec![TxnCondition::eq_value("k", b("v"))], + if_then: vec![TxnOp::put_with_ttl( + "k", + b("v"), + Some(Duration::from_millis(100)), + )], + else_then: vec![], + }); + let want = concat!( + r#"{"Transaction":{"#, + r#""condition":[{"key":"k","expected":0,"target":{"Value":[118]}}],"#, + r#""if_then":[{"request":{"Put":{"key":"k","value":[118],"prev_value":true,"expire_at":null,"ttl_ms":100}}}],"#, + r#""else_then":[]"#, + r#"}}"# + ); + assert_eq!(want, serde_json::to_string(&cmd)?); + assert_eq!(cmd, serde_json::from_str(want)?); + + Ok(()) + } + + fn b(x: impl ToString) -> Vec { + x.to_string().into_bytes() + } +} diff --git a/src/meta/types/src/proto_display.rs b/src/meta/types/src/proto_display.rs index e76311dcde39..17bd20d69d91 100644 --- a/src/meta/types/src/proto_display.rs +++ b/src/meta/types/src/proto_display.rs @@ -201,6 +201,9 @@ impl Display for Target { Target::Seq(seq) => { write!(f, "seq({})", seq) } + Target::KeysWithPrefix(n) => { + write!(f, "keys_with_prefix({})", n) + } } } } diff --git a/src/meta/types/src/proto_ext/txn_ext.rs b/src/meta/types/src/proto_ext/txn_ext.rs index 09de0b4d292e..9ef3c5bde9f7 100644 --- a/src/meta/types/src/proto_ext/txn_ext.rs +++ b/src/meta/types/src/proto_ext/txn_ext.rs @@ -14,6 +14,9 @@ use std::time::Duration; +use pb::txn_condition::ConditionResult; +use pb::txn_condition::Target; + use crate::protobuf as pb; use crate::seq_value::SeqV; use crate::TxnRequest; @@ -33,31 +36,45 @@ impl TxnRequest { impl pb::TxnCondition { /// Create a txn condition that checks if the `seq` matches. pub fn eq_seq(key: impl ToString, seq: u64) -> Self { - Self::match_seq(key, pb::txn_condition::ConditionResult::Eq, seq) + Self::match_seq(key, ConditionResult::Eq, seq) } /// Create a txn condition that checks if the `seq` match. - pub fn match_seq(key: impl ToString, op: pb::txn_condition::ConditionResult, seq: u64) -> Self { + pub fn match_seq(key: impl ToString, op: ConditionResult, seq: u64) -> Self { Self { key: key.to_string(), expected: op as i32, - target: Some(pb::txn_condition::Target::Seq(seq)), + target: Some(Target::Seq(seq)), } } pub fn eq_value(key: impl ToString, value: Vec) -> Self { - Self::match_value(key, pb::txn_condition::ConditionResult::Eq, value) + Self::match_value(key, ConditionResult::Eq, value) } - pub fn match_value( - key: impl ToString, - op: pb::txn_condition::ConditionResult, - value: Vec, - ) -> Self { + pub fn match_value(key: impl ToString, op: ConditionResult, value: Vec) -> Self { Self { key: key.to_string(), expected: op as i32, - target: Some(pb::txn_condition::Target::Value(value)), + target: Some(Target::Value(value)), + } + } + + /// Assert that there are exact `n` keys with the given prefix. + /// + /// Usually, the prefix should end with a slash `/`. + pub fn keys_with_prefix(prefix: impl ToString, n: u64) -> Self { + Self::match_keys_with_prefix(prefix, ConditionResult::Eq, n) + } + + /// Compare the number of keys with the given prefix against the given `count`. + /// + /// Usually, the prefix should end with a slash `/`. + pub fn match_keys_with_prefix(prefix: impl ToString, op: ConditionResult, count: u64) -> Self { + Self { + key: prefix.to_string(), + expected: op as i32, + target: Some(Target::KeysWithPrefix(count)), } } }