Skip to content

Commit

Permalink
Deprecate and ignore enable_options_value_normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
blaginin committed Dec 13, 2024
1 parent 947d1c2 commit 66c3cd5
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 106 deletions.
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ half = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
libc = "0.2.140"
log = { workspace = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true, optional = true, default-features = true }
paste = "1.0.15"
Expand Down
13 changes: 9 additions & 4 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ use crate::{DataFusionError, Result};

/// A macro that wraps a configuration struct and automatically derives
/// [`Default`] and [`ConfigField`] for it, allowing it to be used
/// in the [`ConfigOptions`] configuration tree
/// in the [`ConfigOptions`] configuration tree.
///
/// transform is be used to normalize the value before parsing.
///
/// For example,
///
Expand Down Expand Up @@ -113,7 +115,7 @@ macro_rules! config_namespace {
$vis:vis struct $struct_name:ident {
$(
$(#[doc = $d:tt])*
$field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr
$field_vis:vis $field_name:ident : $field_type:ty, $(warn = $warn: expr,)? $(transform = $transform:expr,)? default = $default:expr
)*$(,)*
}
) => {
Expand All @@ -135,6 +137,7 @@ macro_rules! config_namespace {
$(
stringify!($field_name) => {
$(let value = $transform(value);)?
$(log::warn!($warn);)?
self.$field_name.set(rem, value.as_ref())
},
)*
Expand Down Expand Up @@ -218,8 +221,10 @@ config_namespace! {
/// When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
pub enable_ident_normalization: bool, default = true

/// When set to true, SQL parser will normalize options value (convert value to lowercase)
pub enable_options_value_normalization: bool, default = false
/// When set to true, SQL parser will normalize options value (convert value to lowercase).
/// Note that this option is ignored and will be removed in the future. All case-insensitive values
/// are normalized automatically.
pub enable_options_value_normalization: bool, warn = "`enable_options_value_normalization` is deprecated and ignored", default = false

/// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
Expand Down
33 changes: 2 additions & 31 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ use arrow_schema::*;
use datafusion_common::{
field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError,
};
use sqlparser::ast::TimezoneInfo;
use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
use sqlparser::ast::{TimezoneInfo, Value};

use datafusion_common::TableReference;
use datafusion_common::{
Expand All @@ -38,7 +38,7 @@ use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
use datafusion_expr::utils::find_column_exprs;
use datafusion_expr::{col, Expr};

use crate::utils::{make_decimal_type, value_to_string};
use crate::utils::make_decimal_type;
pub use datafusion_expr::planner::ContextProvider;

/// SQL parser options
Expand Down Expand Up @@ -87,32 +87,6 @@ impl IdentNormalizer {
}
}

/// Value Normalizer
#[derive(Debug)]
pub struct ValueNormalizer {
normalize: bool,
}

impl Default for ValueNormalizer {
fn default() -> Self {
Self { normalize: true }
}
}

impl ValueNormalizer {
pub fn new(normalize: bool) -> Self {
Self { normalize }
}

pub fn normalize(&self, value: Value) -> Option<String> {
match (value_to_string(&value), self.normalize) {
(Some(s), true) => Some(s.to_ascii_lowercase()),
(Some(s), false) => Some(s),
(None, _) => None,
}
}
}

/// Struct to store the states used by the Planner. The Planner will leverage the states to resolve
/// CTEs, Views, subqueries and PREPARE statements. The states include
/// Common Table Expression (CTE) provided with WITH clause and
Expand Down Expand Up @@ -254,7 +228,6 @@ pub struct SqlToRel<'a, S: ContextProvider> {
pub(crate) context_provider: &'a S,
pub(crate) options: ParserOptions,
pub(crate) ident_normalizer: IdentNormalizer,
pub(crate) value_normalizer: ValueNormalizer,
}

impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Expand All @@ -266,13 +239,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// Create a new query planner
pub fn new_with_options(context_provider: &'a S, options: ParserOptions) -> Self {
let ident_normalize = options.enable_ident_normalization;
let options_value_normalize = options.enable_options_value_normalization;

SqlToRel {
context_provider,
options,
ident_normalizer: IdentNormalizer::new(ident_normalize),
value_normalizer: ValueNormalizer::new(options_value_normalize),
}
}

Expand Down
3 changes: 1 addition & 2 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1386,8 +1386,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
return plan_err!("Option {key} is specified multiple times");
}

let Some(value_string) = self.value_normalizer.normalize(value.clone())
else {
let Some(value_string) = crate::utils::value_to_string(&value) else {
return plan_err!("Unsupported Value {}", value);
};

Expand Down
69 changes: 2 additions & 67 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ use datafusion_common::{
};
use datafusion_expr::{
col,
dml::CopyTo,
logical_plan::{LogicalPlan, Prepare},
test::function_stub::sum_udaf,
ColumnarValue, CreateExternalTable, CreateIndex, DdlStatement, ScalarUDF,
ScalarUDFImpl, Signature, Statement, Volatility,
ColumnarValue, CreateIndex, DdlStatement, ScalarUDF, ScalarUDFImpl, Signature,
Statement, Volatility,
};
use datafusion_functions::{string, unicode};
use datafusion_sql::{
Expand Down Expand Up @@ -161,70 +160,6 @@ fn parse_ident_normalization() {
}
}

#[test]
fn test_parse_options_value_normalization() {
let test_data = [
(
"CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
"CreateExternalTable: Bare { table: \"test\" }",
HashMap::from([("format.location", "LoCaTiOn")]),
false,
),
(
"CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
"CreateExternalTable: Bare { table: \"test\" }",
HashMap::from([("format.location", "location")]),
true,
),
(
"COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
"CopyTo: format=csv output_url=fake_location options: (format.location LoCaTiOn)\n TableScan: test",
HashMap::from([("format.location", "LoCaTiOn")]),
false,
),
(
"COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
"CopyTo: format=csv output_url=fake_location options: (format.location location)\n TableScan: test",
HashMap::from([("format.location", "location")]),
true,
),
];

for (sql, expected_plan, expected_options, enable_options_value_normalization) in
test_data
{
let plan = logical_plan_with_options(
sql,
ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization: false,
support_varchar_with_length: false,
enable_options_value_normalization,
},
);
if let Ok(plan) = plan {
assert_eq!(expected_plan, format!("{plan}"));

match plan {
LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
CreateExternalTable { options, .. },
))
| LogicalPlan::Copy(CopyTo { options, .. }) => {
expected_options.iter().for_each(|(k, v)| {
assert_eq!(Some(&v.to_string()), options.get(*k));
});
}
_ => panic!(
"Expected Ddl(CreateExternalTable) or Copy(CopyTo) but got {:?}",
plan
),
}
} else {
assert_eq!(expected_plan, plan.unwrap_err().strip_backtrace());
}
}
}

#[test]
fn select_no_relation() {
quick_test(
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information |
| datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type |
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) |
| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase) |
| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. |
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |

0 comments on commit 66c3cd5

Please sign in to comment.