diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala index 94d2895943d4..d47523c0f6ac 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala @@ -44,6 +44,15 @@ case class SequenceValidator() extends FunctionValidator { } } +case class UtcTimestampValidator() extends FunctionValidator { + override def doValidate(expr: Expression): Boolean = expr match { + // CH backend doest not support non-const timezone parameter + case t: ToUTCTimestamp => t.children(1).isInstanceOf[Literal] + case f: FromUTCTimestamp => f.children(1).isInstanceOf[Literal] + case _ => false + } +} + case class UnixTimeStampValidator() extends FunctionValidator { final val DATE_TYPE = "date" @@ -194,8 +203,8 @@ object CHExpressionUtil { REGR_SLOPE -> DefaultValidator(), REGR_INTERCEPT -> DefaultValidator(), REGR_SXY -> DefaultValidator(), - TO_UTC_TIMESTAMP -> DefaultValidator(), - FROM_UTC_TIMESTAMP -> DefaultValidator(), + TO_UTC_TIMESTAMP -> UtcTimestampValidator(), + FROM_UTC_TIMESTAMP -> UtcTimestampValidator(), UNIX_MILLIS -> DefaultValidator(), UNIX_MICROS -> DefaultValidator(), TIMESTAMP_MILLIS -> DefaultValidator(), diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index 62b42f981168..fa6124cf011f 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -586,7 +586,7 @@ void BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config) if (config->has("timezone")) { const std::string config_timezone = config->getString("timezone"); - const String mapped_timezone = DateLUT::mappingForJavaTimezone(config_timezone); + const String mapped_timezone = DateTimeUtil::convertTimeZone(config_timezone); if (0 != setenv("TZ", mapped_timezone.data(), 1)) // NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other setenv/getenv throw Poco::Exception("Cannot setenv TZ variable"); @@ -659,11 +659,7 @@ void BackendInitializerUtil::initSettings(std::map & b } else if (key == SPARK_SESSION_TIME_ZONE) { - String time_zone_val = value; - /// Convert timezone ID like '+8:00' to GMT+8:00 - if (value.starts_with("+") || value.starts_with("-")) - time_zone_val = "GMT" + value; - time_zone_val = DateLUT::mappingForJavaTimezone(time_zone_val); + String time_zone_val = DateTimeUtil::convertTimeZone(value); settings.set("session_timezone", time_zone_val); LOG_DEBUG(&Poco::Logger::get("CHUtil"), "Set settings key:{} value:{}", "session_timezone", time_zone_val); } @@ -937,6 +933,16 @@ Int64 DateTimeUtil::currentTimeMillis() return timeInMilliseconds(std::chrono::system_clock::now()); } +String DateTimeUtil::convertTimeZone(const String & time_zone) +{ + String res = time_zone; + /// Convert timezone ID like '+08:00' to GMT+8:00 + if (time_zone.starts_with("+") || time_zone.starts_with("-")) + res = "GMT" + time_zone; + res = DateLUT::mappingForJavaTimezone(res); + return res; +} + UInt64 MemoryUtil::getCurrentMemoryUsage(size_t depth) { Int64 current_memory_usage = 0; diff --git a/cpp-ch/local-engine/Common/CHUtil.h b/cpp-ch/local-engine/Common/CHUtil.h index 2ef3c6ef99df..50de9461f4de 100644 --- a/cpp-ch/local-engine/Common/CHUtil.h +++ b/cpp-ch/local-engine/Common/CHUtil.h @@ -225,6 +225,7 @@ class DateTimeUtil { public: static Int64 currentTimeMillis(); + static String convertTimeZone(const String & time_zone); }; class MemoryUtil diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h b/cpp-ch/local-engine/Parser/SerializedPlanParser.h index d0a16ec71a47..c79598c59923 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h @@ -57,7 +57,7 @@ static const std::map SCALAR_FUNCTIONS {"get_timestamp", "parseDateTimeInJodaSyntaxOrNull"}, // for spark function: to_date/to_timestamp {"quarter", "toQuarter"}, {"to_unix_timestamp", "parseDateTimeInJodaSyntaxOrNull"}, - // {"unix_timestamp", "toUnixTimestamp"}, + //{"unix_timestamp", "toUnixTimestamp"}, {"date_format", "formatDateTimeInJodaSyntax"}, {"timestamp_add", "timestamp_add"}, diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp new file mode 100644 index 000000000000..8d23231055c3 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace local_engine +{ + +class FunctionParserFromUtcTimestamp : public FunctionParserUtcTimestampTransform +{ +public: + explicit FunctionParserFromUtcTimestamp(SerializedPlanParser * plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { } + ~FunctionParserFromUtcTimestamp() = default; + + static constexpr auto name = "from_utc_timestamp"; + String getCHFunctionName(const substrait::Expression_ScalarFunction &) const override { return "from_utc_timestamp"; } + String getName() const override { return "from_utc_timestamp"; } +}; + +static FunctionParserRegister fromUtcTimestamp; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp new file mode 100644 index 000000000000..4b04942bab31 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace local_engine +{ + +class FunctionParserToUtcTimestamp : public FunctionParserUtcTimestampTransform +{ +public: + explicit FunctionParserToUtcTimestamp(SerializedPlanParser * plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { } + ~FunctionParserToUtcTimestamp() = default; + + static constexpr auto name = "to_utc_timestamp"; + String getCHFunctionName(const substrait::Expression_ScalarFunction &) const override { return "to_utc_timestamp"; } + String getName() const override { return "to_utc_timestamp"; } +}; + +static FunctionParserRegister toUtcTimestamp; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h new file mode 100644 index 000000000000..87ea19024169 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} +} + +namespace local_engine +{ + +class FunctionParserUtcTimestampTransform : public FunctionParser +{ +public: + explicit FunctionParserUtcTimestampTransform(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) { } + ~FunctionParserUtcTimestampTransform() override = default; + + const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction & substrait_func, ActionsDAGPtr & actions_dag) const override + { + /// Convert timezone value to clickhouse backend supported, i.e. GMT+8 -> Etc/GMT-8, +08:00 -> Etc/GMT-8 + if (substrait_func.arguments_size() != 2) + throw DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s must have 2 arguments", getName()); + + const substrait::Expression & arg1 = substrait_func.arguments()[1].value(); + if (!arg1.has_literal() || !arg1.literal().has_string()) + throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 2nd argument should be string literal", getName()); + + const String & arg1_literal = arg1.literal().string(); + String time_zone_val = DateTimeUtil::convertTimeZone(arg1_literal); + auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); + auto nullable_string_type = DB::makeNullable(std::make_shared()); + const auto * time_zone_node = addColumnToActionsDAG(actions_dag, nullable_string_type, time_zone_val); + const auto * result_node = toFunctionNode(actions_dag, getCHFunctionName(substrait_func), {parsed_args[0], time_zone_node}); + return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); + } +}; +} diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 2c34baa6379a..a8a5a1e412a8 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -300,9 +300,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes") .excludeGlutenTest("unix_timestamp") .excludeGlutenTest("to_unix_timestamp") - .exclude("to_utc_timestamp with literal zone") .exclude("to_utc_timestamp with column zone") - .exclude("from_utc_timestamp with literal zone") .exclude("from_utc_timestamp with column zone") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest( diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index bb782fde3d4d..cb33f002553b 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -326,9 +326,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes") .excludeGlutenTest("unix_timestamp") .excludeGlutenTest("to_unix_timestamp") - .exclude("to_utc_timestamp with literal zone") .exclude("to_utc_timestamp with column zone") - .exclude("from_utc_timestamp with literal zone") .exclude("from_utc_timestamp with column zone") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest( diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 7a38774273ac..07af1fa845ca 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes") .excludeGlutenTest("unix_timestamp") .excludeGlutenTest("to_unix_timestamp") - .exclude("to_utc_timestamp with literal zone") .exclude("to_utc_timestamp with column zone") - .exclude("from_utc_timestamp with literal zone") .exclude("from_utc_timestamp with column zone") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest( diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 7a38774273ac..07af1fa845ca 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes") .excludeGlutenTest("unix_timestamp") .excludeGlutenTest("to_unix_timestamp") - .exclude("to_utc_timestamp with literal zone") .exclude("to_utc_timestamp with column zone") - .exclude("from_utc_timestamp with literal zone") .exclude("from_utc_timestamp with column zone") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(