From 5ad1c6926a56d97de88765396cd4ffafd29f0b63 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Fri, 27 Sep 2024 10:49:41 +0800 Subject: [PATCH 1/2] Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'. --- cpp/core/config/GlutenConfig.h | 2 ++ cpp/velox/compute/WholeStageResultIterator.cc | 5 +++++ .../src/main/scala/org/apache/gluten/GlutenConfig.scala | 2 ++ 3 files changed, 9 insertions(+) diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index cd5196aa8c13..47e5558f280e 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -71,6 +71,8 @@ const std::string kIaaBackendName = "iaa"; const std::string kSparkRedactionRegex = "spark.redaction.regex"; const std::string kSparkRedactionString = "*********(redacted)"; +const std::string kSparkLegacyTimeParserPolicy = "spark.sql.legacy.timeParserPolicy"; + std::unordered_map parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength); diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 479aa9ff0fef..523e4354df47 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -520,6 +520,11 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId); + // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'. + if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { + configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true"; + } + } catch (const std::invalid_argument& err) { std::string errDetails = err.what(); throw std::runtime_error("Invalid conf arg: " + errDetails); diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 7a384f762074..eaa5387512a4 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -665,6 +665,7 @@ object GlutenConfig { SQLConf.SESSION_LOCAL_TIMEZONE.key, GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY, SQLConf.LEGACY_SIZE_OF_NULL.key, + SQLConf.LEGACY_TIME_PARSER_POLICY.key, "spark.io.compression.codec", "spark.sql.decimalOperations.allowPrecisionLoss", COLUMNAR_VELOX_BLOOM_FILTER_EXPECTED_NUM_ITEMS.key, @@ -695,6 +696,7 @@ object GlutenConfig { val keyWithDefault = ImmutableList.of( (SQLConf.CASE_SENSITIVE.key, SQLConf.CASE_SENSITIVE.defaultValueString), (SQLConf.IGNORE_MISSING_FILES.key, SQLConf.IGNORE_MISSING_FILES.defaultValueString), + (SQLConf.LEGACY_TIME_PARSER_POLICY.key, SQLConf.LEGACY_TIME_PARSER_POLICY.defaultValueString), ( COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.key, COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString), From e55f2ea4500cd0963295ca0ad4e5b324f43afa4d Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Wed, 9 Oct 2024 16:01:49 +0800 Subject: [PATCH 2/2] updates --- cpp/velox/compute/WholeStageResultIterator.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 523e4354df47..850279041634 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -523,6 +523,8 @@ std::unordered_map WholeStageResultIterator::getQueryC // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'. if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true"; + } else { + configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false"; } } catch (const std::invalid_argument& err) {