From 66c3cd5964a62283d9ce660299e53e09109377d3 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 12:39:03 +0000 Subject: [PATCH] Deprecate and ignore `enable_options_value_normalization` --- datafusion-cli/Cargo.lock | 1 + datafusion/common/Cargo.toml | 1 + datafusion/common/src/config.rs | 13 ++-- datafusion/sql/src/planner.rs | 33 +-------- datafusion/sql/src/statement.rs | 3 +- datafusion/sql/tests/sql_integration.rs | 69 +------------------ .../test_files/information_schema.slt | 2 +- docs/source/user-guide/configs.md | 2 +- 8 files changed, 18 insertions(+), 106 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a0a89fb3d14f..adb04a9a5a6e 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1329,6 +1329,7 @@ dependencies = [ "hashbrown 0.14.5", "indexmap", "libc", + "log", "object_store", "parquet", "paste", diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 82909404e455..a81ec724dd66 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -57,6 +57,7 @@ half = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } libc = "0.2.140" +log = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index de5ea49771bd..a1359f33fa91 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -30,7 +30,9 @@ use crate::{DataFusionError, Result}; /// A macro that wraps a configuration struct and automatically derives /// [`Default`] and [`ConfigField`] for it, allowing it to be used -/// in the [`ConfigOptions`] configuration tree +/// in the [`ConfigOptions`] configuration tree. +/// +/// transform is be used to normalize the value before parsing. /// /// For example, /// @@ -113,7 +115,7 @@ macro_rules! config_namespace { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(warn = $warn: expr,)? $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -135,6 +137,7 @@ macro_rules! config_namespace { $( stringify!($field_name) => { $(let value = $transform(value);)? + $(log::warn!($warn);)? self.$field_name.set(rem, value.as_ref()) }, )* @@ -218,8 +221,10 @@ config_namespace! { /// When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) pub enable_ident_normalization: bool, default = true - /// When set to true, SQL parser will normalize options value (convert value to lowercase) - pub enable_options_value_normalization: bool, default = false + /// When set to true, SQL parser will normalize options value (convert value to lowercase). + /// Note that this option is ignored and will be removed in the future. All case-insensitive values + /// are normalized automatically. + pub enable_options_value_normalization: bool, warn = "`enable_options_value_normalization` is deprecated and ignored", default = false /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 259c02f4cbd7..2d0ba8f8d994 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -24,10 +24,10 @@ use arrow_schema::*; use datafusion_common::{ field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError, }; +use sqlparser::ast::TimezoneInfo; use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo}; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; -use sqlparser::ast::{TimezoneInfo, Value}; use datafusion_common::TableReference; use datafusion_common::{ @@ -38,7 +38,7 @@ use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; use datafusion_expr::utils::find_column_exprs; use datafusion_expr::{col, Expr}; -use crate::utils::{make_decimal_type, value_to_string}; +use crate::utils::make_decimal_type; pub use datafusion_expr::planner::ContextProvider; /// SQL parser options @@ -87,32 +87,6 @@ impl IdentNormalizer { } } -/// Value Normalizer -#[derive(Debug)] -pub struct ValueNormalizer { - normalize: bool, -} - -impl Default for ValueNormalizer { - fn default() -> Self { - Self { normalize: true } - } -} - -impl ValueNormalizer { - pub fn new(normalize: bool) -> Self { - Self { normalize } - } - - pub fn normalize(&self, value: Value) -> Option { - match (value_to_string(&value), self.normalize) { - (Some(s), true) => Some(s.to_ascii_lowercase()), - (Some(s), false) => Some(s), - (None, _) => None, - } - } -} - /// Struct to store the states used by the Planner. The Planner will leverage the states to resolve /// CTEs, Views, subqueries and PREPARE statements. The states include /// Common Table Expression (CTE) provided with WITH clause and @@ -254,7 +228,6 @@ pub struct SqlToRel<'a, S: ContextProvider> { pub(crate) context_provider: &'a S, pub(crate) options: ParserOptions, pub(crate) ident_normalizer: IdentNormalizer, - pub(crate) value_normalizer: ValueNormalizer, } impl<'a, S: ContextProvider> SqlToRel<'a, S> { @@ -266,13 +239,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Create a new query planner pub fn new_with_options(context_provider: &'a S, options: ParserOptions) -> Self { let ident_normalize = options.enable_ident_normalization; - let options_value_normalize = options.enable_options_value_normalization; SqlToRel { context_provider, options, ident_normalizer: IdentNormalizer::new(ident_normalize), - value_normalizer: ValueNormalizer::new(options_value_normalize), } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 38695f98b5fe..f750afbc4a53 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1386,8 +1386,7 @@ impl SqlToRel<'_, S> { return plan_err!("Option {key} is specified multiple times"); } - let Some(value_string) = self.value_normalizer.normalize(value.clone()) - else { + let Some(value_string) = crate::utils::value_to_string(&value) else { return plan_err!("Unsupported Value {}", value); }; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 8c2d8ebad43f..9d52a2cc7b2a 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -29,11 +29,10 @@ use datafusion_common::{ }; use datafusion_expr::{ col, - dml::CopyTo, logical_plan::{LogicalPlan, Prepare}, test::function_stub::sum_udaf, - ColumnarValue, CreateExternalTable, CreateIndex, DdlStatement, ScalarUDF, - ScalarUDFImpl, Signature, Statement, Volatility, + ColumnarValue, CreateIndex, DdlStatement, ScalarUDF, ScalarUDFImpl, Signature, + Statement, Volatility, }; use datafusion_functions::{string, unicode}; use datafusion_sql::{ @@ -161,70 +160,6 @@ fn parse_ident_normalization() { } } -#[test] -fn test_parse_options_value_normalization() { - let test_data = [ - ( - "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'", - "CreateExternalTable: Bare { table: \"test\" }", - HashMap::from([("format.location", "LoCaTiOn")]), - false, - ), - ( - "CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'", - "CreateExternalTable: Bare { table: \"test\" }", - HashMap::from([("format.location", "location")]), - true, - ), - ( - "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')", - "CopyTo: format=csv output_url=fake_location options: (format.location LoCaTiOn)\n TableScan: test", - HashMap::from([("format.location", "LoCaTiOn")]), - false, - ), - ( - "COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')", - "CopyTo: format=csv output_url=fake_location options: (format.location location)\n TableScan: test", - HashMap::from([("format.location", "location")]), - true, - ), - ]; - - for (sql, expected_plan, expected_options, enable_options_value_normalization) in - test_data - { - let plan = logical_plan_with_options( - sql, - ParserOptions { - parse_float_as_decimal: false, - enable_ident_normalization: false, - support_varchar_with_length: false, - enable_options_value_normalization, - }, - ); - if let Ok(plan) = plan { - assert_eq!(expected_plan, format!("{plan}")); - - match plan { - LogicalPlan::Ddl(DdlStatement::CreateExternalTable( - CreateExternalTable { options, .. }, - )) - | LogicalPlan::Copy(CopyTo { options, .. }) => { - expected_options.iter().for_each(|(k, v)| { - assert_eq!(Some(&v.to_string()), options.get(*k)); - }); - } - _ => panic!( - "Expected Ddl(CreateExternalTable) or Copy(CopyTo) but got {:?}", - plan - ), - } - } else { - assert_eq!(expected_plan, plan.unwrap_err().strip_backtrace()); - } - } -} - #[test] fn select_no_relation() { quick_test( diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index ce348570c530..1f6b5f9852ec 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) -datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase) +datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 304b0efe5b65..77433c85cb66 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | -| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase) | +| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |